diff --git "a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log" "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log" new file mode 100644--- /dev/null +++ "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log" @@ -0,0 +1,5571 @@ +# Running on gpua003.delta.ncsa.illinois.edu +# Started at Wed Jul 5 22:37:23 CDT 2023 +# SLURMD_NODENAME=gpua003 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2132611 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2132611 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpua[003,005,010,025,028-029,031,035,053,055,057,060,074,087,090,098]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA100x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpua[003,005,010,025,028-029,031,035,053,055,057,060,074,087,090,098]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=350544 +# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua003 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +[gpua003:0/64] 2023-07-05 22:40:37,448 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpua003:0/64] 2023-07-05 22:40:38,431 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpua003:0/64] 2023-07-05 22:40:38,458 (s2t:483) INFO: Vocabulary size: 50002 +[gpua003:0/64] 2023-07-05 22:40:52,612 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpua003:0/64] 2023-07-05 22:40:52,630 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpua003:0/64] 2023-07-05 22:40:53,329 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpua003:0/64] 2023-07-05 22:41:01,373 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 22:41:01,580 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 22:41:01,580 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpua003:0/64] 2023-07-05 22:41:01,582 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpua003:0/64] 2023-07-05 22:41:02,091 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 22:41:02,408 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 22:41:02,409 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpua003:0/64] 2023-07-05 22:41:02,409 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpua003:0/64] 2023-07-05 22:41:33,411 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpua003:350633:350633 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0> +gpua003:350633:350633 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua003:350633:350633 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpua003:0/64] 2023-07-05 22:41:38,247 (trainer:284) INFO: 14/100epoch started +[gpua003:0/64] 2023-07-05 22:41:38,292 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-05 22:41:57,218 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 22:42:00,682 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 22:42:00,683 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-05 22:42:00,689 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpua031:1680700:1680700 [0] NCCL INFO cudaDriverVersion 12010 +gpua031:1680700:1680700 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0> +gpua031:1680700:1680700 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua031:1680700:1680773 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0> +gpua031:1680700:1680773 [0] NCCL INFO Using network IB +gpua031:1680700:1680773 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua031:1680700:1680773 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read +gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read +gpua031:1680700:1680773 [0] NCCL INFO Connected all rings +gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Connected all trees +gpua031:1680700:1680773 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua031:1680700:1680773 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua031:1680700:1680773 [0] NCCL INFO comm 0xb9862e50 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua031:1680703:1680703 [3] NCCL INFO cudaDriverVersion 12010 +gpua031:1680703:1680703 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0> +gpua031:1680703:1680703 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua031:1680703:1680772 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0> +gpua031:1680703:1680772 [3] NCCL INFO Using network IB +gpua031:1680703:1680772 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua031:1680703:1680772 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpua031:1680703:1680772 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpua031:1680703:1680772 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpua031:1680703:1680772 [3] NCCL INFO Connected all rings +gpua031:1680703:1680772 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read +gpua031:1680703:1680772 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read +gpua031:1680703:1680772 [3] NCCL INFO Connected all trees +gpua031:1680703:1680772 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua031:1680703:1680772 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua031:1680703:1680772 [3] NCCL INFO comm 0x5195fe00 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua031:1680702:1680702 [2] NCCL INFO cudaDriverVersion 12010 +gpua031:1680702:1680702 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0> +gpua031:1680702:1680702 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua031:1680702:1680774 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0> +gpua031:1680702:1680774 [2] NCCL INFO Using network IB +gpua031:1680702:1680774 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua031:1680702:1680774 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpua031:1680702:1680774 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read +gpua031:1680702:1680774 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read +gpua031:1680702:1680774 [2] NCCL INFO Connected all rings +gpua031:1680702:1680774 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read +gpua031:1680702:1680774 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read +gpua031:1680702:1680774 [2] NCCL INFO Connected all trees +gpua031:1680702:1680774 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua031:1680702:1680774 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua031:1680702:1680774 [2] NCCL INFO comm 0x90042a50 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua031:1680701:1680701 [1] NCCL INFO cudaDriverVersion 12010 +gpua031:1680701:1680701 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0> +gpua031:1680701:1680701 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua031:1680701:1680775 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0> +gpua031:1680701:1680775 [1] NCCL INFO Using network IB +gpua031:1680701:1680775 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua031:1680701:1680775 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read +gpua031:1680701:1680775 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read +gpua031:1680701:1680775 [1] NCCL INFO Connected all rings +gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read +gpua031:1680701:1680775 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read +gpua031:1680701:1680775 [1] NCCL INFO Connected all trees +gpua031:1680701:1680775 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua031:1680701:1680775 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua031:1680701:1680775 [1] NCCL INFO comm 0xb74170b0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua029:1226924:1226924 [3] NCCL INFO cudaDriverVersion 12010 +gpua029:1226924:1226924 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0> +gpua029:1226924:1226924 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua029:1226924:1226999 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0> +gpua029:1226924:1226999 [3] NCCL INFO Using network IB +gpua029:1226924:1226999 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua029:1226924:1226999 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpua029:1226924:1226999 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpua029:1226924:1226999 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpua029:1226924:1226999 [3] NCCL INFO Connected all rings +gpua029:1226924:1226999 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read +gpua029:1226924:1226999 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read +gpua029:1226924:1226999 [3] NCCL INFO Connected all trees +gpua029:1226924:1226999 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua029:1226924:1226999 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua029:1226924:1226999 [3] NCCL INFO comm 0x502a1280 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua029:1226921:1226921 [0] NCCL INFO cudaDriverVersion 12010 +gpua029:1226921:1226921 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0> +gpua029:1226921:1226921 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua029:1226921:1226997 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0> +gpua029:1226921:1226997 [0] NCCL INFO Using network IB +gpua029:1226921:1226997 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua029:1226921:1226997 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read +gpua029:1226921:1226997 [0] NCCL INFO Connected all rings +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Connected all trees +gpua029:1226921:1226997 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua029:1226921:1226997 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua029:1226921:1226997 [0] NCCL INFO comm 0x8dcadfd0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua074:989792:989792 [1] NCCL INFO cudaDriverVersion 12010 +gpua074:989792:989792 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0> +gpua074:989792:989792 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua074:989792:989862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0> +gpua074:989792:989862 [1] NCCL INFO Using network IB +gpua074:989792:989862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua074:989792:989862 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read +gpua074:989792:989862 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read +gpua074:989792:989862 [1] NCCL INFO Connected all rings +gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read +gpua074:989792:989862 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read +gpua074:989792:989862 [1] NCCL INFO Connected all trees +gpua074:989792:989862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua074:989792:989862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua074:989792:989862 [1] NCCL INFO comm 0x91b8e50 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua074:989794:989794 [3] NCCL INFO cudaDriverVersion 12010 +gpua074:989794:989794 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0> +gpua074:989794:989794 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua074:989794:989863 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0> +gpua074:989794:989863 [3] NCCL INFO Using network IB +gpua074:989794:989863 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua074:989794:989863 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpua074:989794:989863 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpua074:989794:989863 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpua074:989794:989863 [3] NCCL INFO Connected all rings +gpua074:989794:989863 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read +gpua074:989794:989863 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read +gpua074:989794:989863 [3] NCCL INFO Connected all trees +gpua074:989794:989863 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua074:989794:989863 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua074:989794:989863 [3] NCCL INFO comm 0x51823d90 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua074:989793:989793 [2] NCCL INFO cudaDriverVersion 12010 +gpua074:989793:989793 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0> +gpua074:989793:989793 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua074:989793:989861 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0> +gpua074:989793:989861 [2] NCCL INFO Using network IB +gpua074:989793:989861 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua074:989793:989861 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpua074:989793:989861 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read +gpua074:989793:989861 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read +gpua074:989793:989861 [2] NCCL INFO Connected all rings +gpua074:989793:989861 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read +gpua074:989793:989861 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read +gpua074:989793:989861 [2] NCCL INFO Connected all trees +gpua074:989793:989861 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua074:989793:989861 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua074:989793:989861 [2] NCCL INFO comm 0x50124340 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua029:1226922:1226922 [1] NCCL INFO cudaDriverVersion 12010 +gpua029:1226922:1226922 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0> +gpua029:1226922:1226922 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua029:1226922:1226996 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0> +gpua029:1226922:1226996 [1] NCCL INFO Using network IB +gpua029:1226922:1226996 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua029:1226922:1226996 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpua029:1226922:1226996 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read +gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read +gpua029:1226922:1226996 [1] NCCL INFO Connected all rings +gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpua029:1226922:1226996 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read +gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read +gpua029:1226922:1226996 [1] NCCL INFO Connected all trees +gpua029:1226922:1226996 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua029:1226922:1226996 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua029:1226922:1226996 [1] NCCL INFO comm 0x91446d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua087:2330953:2330953 [0] NCCL INFO cudaDriverVersion 12010 +gpua087:2330953:2330953 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0> +gpua087:2330953:2330953 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua087:2330953:2331026 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0> +gpua087:2330953:2331026 [0] NCCL INFO Using network IB +gpua087:2330953:2331026 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua087:2330953:2331026 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read +gpua087:2330953:2331026 [0] NCCL INFO Connected all rings +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Connected all trees +gpua087:2330953:2331026 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua087:2330953:2331026 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua087:2330953:2331026 [0] NCCL INFO comm 0x8805010 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua087:2330955:2330955 [2] NCCL INFO cudaDriverVersion 12010 +gpua087:2330955:2330955 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0> +gpua087:2330955:2330955 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua087:2330955:2331028 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0> +gpua087:2330955:2331028 [2] NCCL INFO Using network IB +gpua087:2330955:2331028 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua087:2330955:2331028 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpua087:2330955:2331028 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read +gpua087:2330955:2331028 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read +gpua087:2330955:2331028 [2] NCCL INFO Connected all rings +gpua087:2330955:2331028 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read +gpua087:2330955:2331028 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read +gpua087:2330955:2331028 [2] NCCL INFO Connected all trees +gpua087:2330955:2331028 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua087:2330955:2331028 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua087:2330955:2331028 [2] NCCL INFO comm 0x1091ecd0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua029:1226923:1226923 [2] NCCL INFO cudaDriverVersion 12010 +gpua029:1226923:1226923 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0> +gpua029:1226923:1226923 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua029:1226923:1226998 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0> +gpua029:1226923:1226998 [2] NCCL INFO Using network IB +gpua029:1226923:1226998 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua029:1226923:1226998 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpua029:1226923:1226998 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read +gpua029:1226923:1226998 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read +gpua029:1226923:1226998 [2] NCCL INFO Connected all rings +gpua029:1226923:1226998 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read +gpua029:1226923:1226998 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read +gpua029:1226923:1226998 [2] NCCL INFO Connected all trees +gpua029:1226923:1226998 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua029:1226923:1226998 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua029:1226923:1226998 [2] NCCL INFO comm 0x9682050 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua057:1814426:1814426 [1] NCCL INFO cudaDriverVersion 12010 +gpua057:1814426:1814426 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> +gpua057:1814426:1814426 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua057:1814426:1814504 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> +gpua057:1814426:1814504 [1] NCCL INFO Using network IB +gpua057:1814426:1814504 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua057:1814426:1814504 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read +gpua057:1814426:1814504 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read +gpua057:1814426:1814504 [1] NCCL INFO Connected all rings +gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read +gpua057:1814426:1814504 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read +gpua057:1814426:1814504 [1] NCCL INFO Connected all trees +gpua057:1814426:1814504 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua057:1814426:1814504 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua057:1814426:1814504 [1] NCCL INFO comm 0xb6887810 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua074:989791:989791 [0] NCCL INFO cudaDriverVersion 12010 +gpua074:989791:989791 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0> +gpua074:989791:989791 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua074:989791:989864 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0> +gpua074:989791:989864 [0] NCCL INFO Using network IB +gpua074:989791:989864 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua074:989791:989864 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read +gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read +gpua074:989791:989864 [0] NCCL INFO Connected all rings +gpua057:1814428:1814428 [3] NCCL INFO cudaDriverVersion 12010 +gpua057:1814428:1814428 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> +gpua057:1814428:1814428 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua057:1814428:1814503 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> +gpua057:1814428:1814503 [3] NCCL INFO Using network IB +gpua057:1814428:1814503 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua057:1814428:1814503 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpua057:1814428:1814503 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpua057:1814428:1814503 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpua057:1814428:1814503 [3] NCCL INFO Connected all rings +gpua057:1814428:1814503 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read +gpua057:1814428:1814503 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read +gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Connected all trees +gpua074:989791:989864 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua074:989791:989864 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua074:989791:989864 [0] NCCL INFO comm 0x4f541ea0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua057:1814428:1814503 [3] NCCL INFO Connected all trees +gpua057:1814428:1814503 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua057:1814428:1814503 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua057:1814428:1814503 [3] NCCL INFO comm 0xa830f510 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua057:1814427:1814427 [2] NCCL INFO cudaDriverVersion 12010 +gpua057:1814427:1814427 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> +gpua057:1814427:1814427 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua057:1814427:1814505 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> +gpua057:1814427:1814505 [2] NCCL INFO Using network IB +gpua057:1814427:1814505 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua057:1814427:1814505 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpua057:1814427:1814505 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read +gpua057:1814427:1814505 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read +gpua057:1814427:1814505 [2] NCCL INFO Connected all rings +gpua057:1814427:1814505 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read +gpua057:1814427:1814505 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read +gpua057:1814427:1814505 [2] NCCL INFO Connected all trees +gpua057:1814427:1814505 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua057:1814427:1814505 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua057:1814427:1814505 [2] NCCL INFO comm 0x8ff8bf0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua087:2330956:2330956 [3] NCCL INFO cudaDriverVersion 12010 +gpua087:2330956:2330956 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0> +gpua087:2330956:2330956 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua087:2330956:2331027 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0> +gpua087:2330956:2331027 [3] NCCL INFO Using network IB +gpua087:2330956:2331027 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua087:2330956:2331027 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpua087:2330956:2331027 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpua087:2330956:2331027 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpua087:2330956:2331027 [3] NCCL INFO Connected all rings +gpua087:2330956:2331027 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read +gpua087:2330956:2331027 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read +gpua057:1814425:1814425 [0] NCCL INFO cudaDriverVersion 12010 +gpua057:1814425:1814425 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> +gpua057:1814425:1814425 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua057:1814425:1814506 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> +gpua057:1814425:1814506 [0] NCCL INFO Using network IB +gpua057:1814425:1814506 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua057:1814425:1814506 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read +gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read +gpua057:1814425:1814506 [0] NCCL INFO Connected all rings +gpua087:2330956:2331027 [3] NCCL INFO Connected all trees +gpua087:2330956:2331027 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua087:2330956:2331027 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua087:2330956:2331027 [3] NCCL INFO comm 0x4fa40250 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Connected all trees +gpua057:1814425:1814506 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua057:1814425:1814506 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua057:1814425:1814506 [0] NCCL INFO comm 0xc0b1e520 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua087:2330954:2330954 [1] NCCL INFO cudaDriverVersion 12010 +gpua087:2330954:2330954 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0> +gpua087:2330954:2330954 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua087:2330954:2331029 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0> +gpua087:2330954:2331029 [1] NCCL INFO Using network IB +gpua087:2330954:2331029 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua087:2330954:2331029 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpua087:2330954:2331029 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read +gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read +gpua087:2330954:2331029 [1] NCCL INFO Connected all rings +gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpua087:2330954:2331029 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read +gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read +gpua087:2330954:2331029 [1] NCCL INFO Connected all trees +gpua087:2330954:2331029 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua087:2330954:2331029 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua087:2330954:2331029 [1] NCCL INFO comm 0xbc380f30 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua028:3269324:3269324 [3] NCCL INFO cudaDriverVersion 12010 +gpua028:3269324:3269324 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0> +gpua028:3269324:3269324 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua028:3269324:3269401 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0> +gpua028:3269324:3269401 [3] NCCL INFO Using network IB +gpua028:3269324:3269401 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua028:3269324:3269401 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpua028:3269324:3269401 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpua028:3269324:3269401 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpua028:3269324:3269401 [3] NCCL INFO Connected all rings +gpua028:3269324:3269401 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read +gpua028:3269324:3269401 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read +gpua028:3269324:3269401 [3] NCCL INFO Connected all trees +gpua028:3269324:3269401 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua028:3269324:3269401 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua028:3269324:3269401 [3] NCCL INFO comm 0x50758ff0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua053:959076:959076 [2] NCCL INFO cudaDriverVersion 12010 +gpua053:959076:959076 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:959076:959076 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:959076:959150 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:959076:959150 [2] NCCL INFO Using network IB +gpua053:959076:959150 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua053:959076:959150 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpua053:959076:959150 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read +gpua053:959076:959150 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read +gpua053:959076:959150 [2] NCCL INFO Connected all rings +gpua053:959076:959150 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read +gpua053:959076:959150 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read +gpua053:959076:959150 [2] NCCL INFO Connected all trees +gpua053:959076:959150 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:959076:959150 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:959076:959150 [2] NCCL INFO comm 0xa5547430 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua028:3269322:3269322 [1] NCCL INFO cudaDriverVersion 12010 +gpua028:3269322:3269322 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0> +gpua028:3269322:3269322 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua028:3269322:3269404 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0> +gpua028:3269322:3269404 [1] NCCL INFO Using network IB +gpua028:3269322:3269404 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua028:3269322:3269404 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read +gpua028:3269322:3269404 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read +gpua028:3269322:3269404 [1] NCCL INFO Connected all rings +gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read +gpua028:3269322:3269404 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read +gpua028:3269322:3269404 [1] NCCL INFO Connected all trees +gpua028:3269322:3269404 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua028:3269322:3269404 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua028:3269322:3269404 [1] NCCL INFO comm 0x50ff9ba0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua053:959074:959074 [0] NCCL INFO cudaDriverVersion 12010 +gpua053:959074:959074 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:959074:959074 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:959074:959149 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:959074:959149 [0] NCCL INFO Using network IB +gpua053:959074:959149 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua053:959074:959149 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read +gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read +gpua053:959074:959149 [0] NCCL INFO Connected all rings +gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Connected all trees +gpua053:959074:959149 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:959074:959149 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:959074:959149 [0] NCCL INFO comm 0x50589df0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua028:3269321:3269321 [0] NCCL INFO cudaDriverVersion 12010 +gpua028:3269321:3269321 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0> +gpua028:3269321:3269321 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua028:3269321:3269403 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0> +gpua028:3269321:3269403 [0] NCCL INFO Using network IB +gpua028:3269321:3269403 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua028:3269321:3269403 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read +gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read +gpua028:3269321:3269403 [0] NCCL INFO Connected all rings +gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Connected all trees +gpua028:3269321:3269403 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua028:3269321:3269403 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua028:3269321:3269403 [0] NCCL INFO comm 0xc37df860 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua028:3269323:3269323 [2] NCCL INFO cudaDriverVersion 12010 +gpua028:3269323:3269323 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0> +gpua028:3269323:3269323 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua028:3269323:3269402 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0> +gpua028:3269323:3269402 [2] NCCL INFO Using network IB +gpua028:3269323:3269402 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua028:3269323:3269402 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpua028:3269323:3269402 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read +gpua028:3269323:3269402 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read +gpua028:3269323:3269402 [2] NCCL INFO Connected all rings +gpua028:3269323:3269402 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read +gpua028:3269323:3269402 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read +gpua028:3269323:3269402 [2] NCCL INFO Connected all trees +gpua028:3269323:3269402 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua028:3269323:3269402 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua028:3269323:3269402 [2] NCCL INFO comm 0x4fe1d010 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua053:959077:959077 [3] NCCL INFO cudaDriverVersion 12010 +gpua053:959077:959077 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:959077:959077 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:959077:959151 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:959077:959151 [3] NCCL INFO Using network IB +gpua053:959077:959151 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua053:959077:959151 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpua053:959077:959151 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpua053:959077:959151 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpua053:959077:959151 [3] NCCL INFO Connected all rings +gpua053:959077:959151 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read +gpua053:959077:959151 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read +gpua053:959077:959151 [3] NCCL INFO Connected all trees +gpua053:959077:959151 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:959077:959151 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:959077:959151 [3] NCCL INFO comm 0x8f7ecf20 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua053:959075:959075 [1] NCCL INFO cudaDriverVersion 12010 +gpua053:959075:959075 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:959075:959075 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:959075:959152 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:959075:959152 [1] NCCL INFO Using network IB +gpua053:959075:959152 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua053:959075:959152 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read +gpua053:959075:959152 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read +gpua053:959075:959152 [1] NCCL INFO Connected all rings +gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read +gpua053:959075:959152 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read +gpua053:959075:959152 [1] NCCL INFO Connected all trees +gpua053:959075:959152 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:959075:959152 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:959075:959152 [1] NCCL INFO comm 0x50f9bf70 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua055:3866106:3866106 [3] NCCL INFO cudaDriverVersion 12010 +gpua055:3866106:3866106 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> +gpua055:3866106:3866106 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua055:3866106:3866180 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> +gpua055:3866106:3866180 [3] NCCL INFO Using network IB +gpua055:3866106:3866180 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua055:3866106:3866180 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpua055:3866106:3866180 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpua055:3866106:3866180 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpua055:3866106:3866180 [3] NCCL INFO Connected all rings +gpua055:3866106:3866180 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read +gpua055:3866106:3866180 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read +gpua055:3866106:3866180 [3] NCCL INFO Connected all trees +gpua055:3866106:3866180 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua055:3866106:3866180 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua055:3866106:3866180 [3] NCCL INFO comm 0xb731bb50 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua055:3866104:3866104 [1] NCCL INFO cudaDriverVersion 12010 +gpua055:3866104:3866104 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> +gpua055:3866104:3866104 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua055:3866104:3866182 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> +gpua055:3866104:3866182 [1] NCCL INFO Using network IB +gpua055:3866104:3866182 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua055:3866104:3866182 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpua055:3866104:3866182 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read +gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read +gpua055:3866104:3866182 [1] NCCL INFO Connected all rings +gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpua055:3866104:3866182 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read +gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read +gpua055:3866104:3866182 [1] NCCL INFO Connected all trees +gpua055:3866104:3866182 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua055:3866104:3866182 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua055:3866104:3866182 [1] NCCL INFO comm 0x4ff24650 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua055:3866103:3866103 [0] NCCL INFO cudaDriverVersion 12010 +gpua055:3866103:3866103 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> +gpua055:3866103:3866103 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua055:3866103:3866183 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> +gpua055:3866103:3866183 [0] NCCL INFO Using network IB +gpua055:3866103:3866183 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua055:3866103:3866183 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read +gpua055:3866103:3866183 [0] NCCL INFO Connected all rings +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Connected all trees +gpua055:3866103:3866183 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua055:3866103:3866183 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua055:3866103:3866183 [0] NCCL INFO comm 0x8783410 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua055:3866105:3866105 [2] NCCL INFO cudaDriverVersion 12010 +gpua055:3866105:3866105 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> +gpua055:3866105:3866105 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua055:3866105:3866181 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> +gpua055:3866105:3866181 [2] NCCL INFO Using network IB +gpua055:3866105:3866181 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua055:3866105:3866181 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpua055:3866105:3866181 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read +gpua055:3866105:3866181 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read +gpua055:3866105:3866181 [2] NCCL INFO Connected all rings +gpua055:3866105:3866181 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read +gpua055:3866105:3866181 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read +gpua055:3866105:3866181 [2] NCCL INFO Connected all trees +gpua055:3866105:3866181 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua055:3866105:3866181 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua055:3866105:3866181 [2] NCCL INFO comm 0xa0bacc0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua098:2101209:2101209 [1] NCCL INFO cudaDriverVersion 12010 +gpua098:2101209:2101209 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0> +gpua098:2101209:2101209 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua098:2101209:2101288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0> +gpua098:2101209:2101288 [1] NCCL INFO Using network IB +gpua098:2101209:2101288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua098:2101209:2101288 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpua098:2101209:2101288 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read +gpua098:2101209:2101288 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read +gpua098:2101209:2101288 [1] NCCL INFO Connected all rings +gpua098:2101209:2101288 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read +gpua098:2101209:2101288 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read +gpua098:2101209:2101288 [1] NCCL INFO Connected all trees +gpua098:2101209:2101288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua098:2101209:2101288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua098:2101209:2101288 [1] NCCL INFO comm 0xb77452f0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua098:2101208:2101208 [0] NCCL INFO cudaDriverVersion 12010 +gpua098:2101208:2101208 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0> +gpua098:2101208:2101208 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua098:2101208:2101291 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0> +gpua098:2101208:2101291 [0] NCCL INFO Using network IB +gpua098:2101208:2101291 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua098:2101208:2101291 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read +gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read +gpua098:2101208:2101291 [0] NCCL INFO Connected all rings +gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Connected all trees +gpua098:2101208:2101291 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua098:2101208:2101291 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua098:2101208:2101291 [0] NCCL INFO comm 0x8ba9dc20 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua098:2101210:2101210 [2] NCCL INFO cudaDriverVersion 12010 +gpua098:2101210:2101210 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0> +gpua098:2101210:2101210 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua098:2101210:2101290 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0> +gpua098:2101210:2101290 [2] NCCL INFO Using network IB +gpua098:2101210:2101290 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua098:2101210:2101290 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpua098:2101210:2101290 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read +gpua098:2101210:2101290 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read +gpua098:2101210:2101290 [2] NCCL INFO Connected all rings +gpua098:2101210:2101290 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read +gpua098:2101210:2101290 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read +gpua098:2101210:2101290 [2] NCCL INFO Connected all trees +gpua098:2101210:2101290 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua098:2101210:2101290 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua098:2101210:2101290 [2] NCCL INFO comm 0xb13e4b0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua090:2294100:2294100 [3] NCCL INFO cudaDriverVersion 12010 +gpua090:2294100:2294100 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0> +gpua090:2294100:2294100 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua090:2294100:2294189 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0> +gpua090:2294100:2294189 [3] NCCL INFO Using network IB +gpua090:2294100:2294189 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua090:2294100:2294189 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpua090:2294100:2294189 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpua090:2294100:2294189 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpua090:2294100:2294189 [3] NCCL INFO Connected all rings +gpua090:2294100:2294189 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read +gpua090:2294100:2294189 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read +gpua090:2294100:2294189 [3] NCCL INFO Connected all trees +gpua090:2294100:2294189 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua090:2294100:2294189 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua090:2294100:2294189 [3] NCCL INFO comm 0x8d2a2250 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua090:2294099:2294099 [2] NCCL INFO cudaDriverVersion 12010 +gpua090:2294099:2294099 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0> +gpua090:2294099:2294099 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua090:2294099:2294186 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0> +gpua090:2294099:2294186 [2] NCCL INFO Using network IB +gpua090:2294099:2294186 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua090:2294099:2294186 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpua090:2294099:2294186 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read +gpua090:2294099:2294186 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read +gpua090:2294099:2294186 [2] NCCL INFO Connected all rings +gpua090:2294099:2294186 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read +gpua090:2294099:2294186 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read +gpua090:2294099:2294186 [2] NCCL INFO Connected all trees +gpua090:2294099:2294186 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua090:2294099:2294186 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua090:2294099:2294186 [2] NCCL INFO comm 0x508070c0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua003:350634:350634 [1] NCCL INFO cudaDriverVersion 12010 +gpua003:350634:350634 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0> +gpua003:350634:350634 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua003:350634:350707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0> +gpua003:350634:350707 [1] NCCL INFO Using network IB +gpua003:350634:350707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua003:350634:350707 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpua003:350634:350707 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read +gpua003:350634:350707 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read +gpua003:350634:350707 [1] NCCL INFO Connected all rings +gpua003:350634:350707 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read +gpua003:350634:350707 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read +gpua003:350634:350707 [1] NCCL INFO Connected all trees +gpua003:350634:350707 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua003:350634:350707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua003:350634:350707 [1] NCCL INFO comm 0xb8217e10 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua090:2294098:2294098 [1] NCCL INFO cudaDriverVersion 12010 +gpua090:2294098:2294098 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0> +gpua090:2294098:2294098 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua090:2294098:2294187 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0> +gpua090:2294098:2294187 [1] NCCL INFO Using network IB +gpua090:2294098:2294187 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua090:2294098:2294187 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read +gpua090:2294098:2294187 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read +gpua090:2294098:2294187 [1] NCCL INFO Connected all rings +gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read +gpua090:2294098:2294187 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read +gpua090:2294098:2294187 [1] NCCL INFO Connected all trees +gpua090:2294098:2294187 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua090:2294098:2294187 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua090:2294098:2294187 [1] NCCL INFO comm 0xb9291470 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua098:2101211:2101211 [3] NCCL INFO cudaDriverVersion 12010 +gpua098:2101211:2101211 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0> +gpua098:2101211:2101211 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua098:2101211:2101289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0> +gpua098:2101211:2101289 [3] NCCL INFO Using network IB +gpua098:2101211:2101289 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua098:2101211:2101289 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpua098:2101211:2101289 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpua098:2101211:2101289 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpua098:2101211:2101289 [3] NCCL INFO Connected all rings +gpua098:2101211:2101289 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read +gpua098:2101211:2101289 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read +gpua098:2101211:2101289 [3] NCCL INFO Connected all trees +gpua098:2101211:2101289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua098:2101211:2101289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua098:2101211:2101289 [3] NCCL INFO comm 0xb9e844a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua003:350636:350636 [3] NCCL INFO cudaDriverVersion 12010 +gpua003:350636:350636 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0> +gpua003:350636:350636 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua003:350636:350708 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0> +gpua003:350636:350708 [3] NCCL INFO Using network IB +gpua003:350636:350708 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua003:350636:350708 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpua003:350636:350708 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpua003:350636:350708 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpua003:350636:350708 [3] NCCL INFO Connected all rings +gpua003:350636:350708 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read +gpua003:350636:350708 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read +gpua003:350636:350708 [3] NCCL INFO Connected all trees +gpua003:350636:350708 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua003:350636:350708 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua003:350636:350708 [3] NCCL INFO comm 0x8b901f80 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua010:1622002:1622002 [2] NCCL INFO cudaDriverVersion 12010 +gpua010:1622002:1622002 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0> +gpua010:1622002:1622002 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua010:1622002:1622073 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0> +gpua010:1622002:1622073 [2] NCCL INFO Using network IB +gpua010:1622002:1622073 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua010:1622002:1622073 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpua010:1622002:1622073 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read +gpua010:1622002:1622073 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read +gpua010:1622002:1622073 [2] NCCL INFO Connected all rings +gpua010:1622002:1622073 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read +gpua010:1622002:1622073 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read +gpua010:1622002:1622073 [2] NCCL INFO Connected all trees +gpua010:1622002:1622073 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua010:1622002:1622073 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua010:1622002:1622073 [2] NCCL INFO comm 0x95597d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua003:350633:350706 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0> +gpua003:350633:350706 [0] NCCL INFO Using network IB +gpua003:350633:350706 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua003:350633:350706 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpua003:350633:350706 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpua003:350633:350706 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read +gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read +gpua003:350633:350706 [0] NCCL INFO Connected all rings +gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Connected all trees +gpua003:350633:350706 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua003:350633:350706 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua003:350633:350706 [0] NCCL INFO comm 0x505c0d10 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua003:350635:350635 [2] NCCL INFO cudaDriverVersion 12010 +gpua003:350635:350635 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0> +gpua003:350635:350635 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua003:350635:350709 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0> +gpua003:350635:350709 [2] NCCL INFO Using network IB +gpua003:350635:350709 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua003:350635:350709 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpua003:350635:350709 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read +gpua003:350635:350709 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read +gpua003:350635:350709 [2] NCCL INFO Connected all rings +gpua003:350635:350709 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read +gpua003:350635:350709 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read +gpua003:350635:350709 [2] NCCL INFO Connected all trees +gpua003:350635:350709 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua003:350635:350709 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua003:350635:350709 [2] NCCL INFO comm 0xc165ff50 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua090:2294097:2294097 [0] NCCL INFO cudaDriverVersion 12010 +gpua090:2294097:2294097 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0> +gpua090:2294097:2294097 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua090:2294097:2294188 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0> +gpua090:2294097:2294188 [0] NCCL INFO Using network IB +gpua090:2294097:2294188 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua090:2294097:2294188 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read +gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read +gpua090:2294097:2294188 [0] NCCL INFO Connected all rings +gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Connected all trees +gpua090:2294097:2294188 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua090:2294097:2294188 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua090:2294097:2294188 [0] NCCL INFO comm 0x4ed27c50 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua010:1622003:1622003 [3] NCCL INFO cudaDriverVersion 12010 +gpua010:1622003:1622003 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0> +gpua010:1622003:1622003 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua010:1622003:1622076 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0> +gpua010:1622003:1622076 [3] NCCL INFO Using network IB +gpua010:1622003:1622076 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua010:1622003:1622076 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpua010:1622003:1622076 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpua010:1622003:1622076 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpua010:1622003:1622076 [3] NCCL INFO Connected all rings +gpua010:1622003:1622076 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read +gpua010:1622003:1622076 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read +gpua010:1622003:1622076 [3] NCCL INFO Connected all trees +gpua010:1622003:1622076 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua010:1622003:1622076 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua010:1622003:1622076 [3] NCCL INFO comm 0x9c22310 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua010:1622000:1622000 [0] NCCL INFO cudaDriverVersion 12010 +gpua010:1622000:1622000 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0> +gpua010:1622000:1622000 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua010:1622000:1622074 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0> +gpua010:1622000:1622074 [0] NCCL INFO Using network IB +gpua010:1622000:1622074 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua010:1622000:1622074 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read +gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read +gpua010:1622000:1622074 [0] NCCL INFO Connected all rings +gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Connected all trees +gpua010:1622000:1622074 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua010:1622000:1622074 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua010:1622000:1622074 [0] NCCL INFO comm 0xc2d78fd0 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua010:1622001:1622001 [1] NCCL INFO cudaDriverVersion 12010 +gpua010:1622001:1622001 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0> +gpua010:1622001:1622001 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua010:1622001:1622075 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0> +gpua010:1622001:1622075 [1] NCCL INFO Using network IB +gpua010:1622001:1622075 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua010:1622001:1622075 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read +gpua010:1622001:1622075 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read +gpua010:1622001:1622075 [1] NCCL INFO Connected all rings +gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read +gpua010:1622001:1622075 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read +gpua010:1622001:1622075 [1] NCCL INFO Connected all trees +gpua010:1622001:1622075 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua010:1622001:1622075 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua010:1622001:1622075 [1] NCCL INFO comm 0x8e6a9490 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua060:2854971:2854971 [3] NCCL INFO cudaDriverVersion 12010 +gpua060:2854971:2854971 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2854971:2854971 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2854971:2855041 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2854971:2855041 [3] NCCL INFO Using network IB +gpua060:2854971:2855041 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua060:2854971:2855041 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpua060:2854971:2855041 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpua060:2854971:2855041 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpua060:2854971:2855041 [3] NCCL INFO Connected all rings +gpua060:2854971:2855041 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read +gpua060:2854971:2855041 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read +gpua025:63838:63838 [2] NCCL INFO cudaDriverVersion 12010 +gpua025:63838:63838 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0> +gpua025:63838:63838 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua025:63838:63912 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0> +gpua025:63838:63912 [2] NCCL INFO Using network IB +gpua025:63838:63912 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua025:63838:63912 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpua025:63838:63912 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read +gpua025:63838:63912 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read +gpua025:63838:63912 [2] NCCL INFO Connected all rings +gpua025:63838:63912 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read +gpua025:63838:63912 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read +gpua025:63838:63912 [2] NCCL INFO Connected all trees +gpua060:2854971:2855041 [3] NCCL INFO Connected all trees +gpua060:2854971:2855041 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2854971:2855041 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2854971:2855041 [3] NCCL INFO comm 0xb6f9a6a0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua025:63838:63912 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua025:63838:63912 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua025:63838:63912 [2] NCCL INFO comm 0xc1f876b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua025:63837:63837 [1] NCCL INFO cudaDriverVersion 12010 +gpua025:63837:63837 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0> +gpua025:63837:63837 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua025:63837:63913 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0> +gpua025:63837:63913 [1] NCCL INFO Using network IB +gpua025:63837:63913 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua025:63837:63913 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpua025:63837:63913 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read +gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read +gpua025:63837:63913 [1] NCCL INFO Connected all rings +gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpua025:63837:63913 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read +gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read +gpua025:63837:63913 [1] NCCL INFO Connected all trees +gpua025:63837:63913 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua025:63837:63913 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua025:63837:63913 [1] NCCL INFO comm 0xa196ac90 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua025:63839:63839 [3] NCCL INFO cudaDriverVersion 12010 +gpua025:63839:63839 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0> +gpua025:63839:63839 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua025:63839:63914 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0> +gpua025:63839:63914 [3] NCCL INFO Using network IB +gpua025:63839:63914 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua025:63839:63914 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpua025:63839:63914 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpua025:63839:63914 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpua025:63839:63914 [3] NCCL INFO Connected all rings +gpua025:63839:63914 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read +gpua025:63839:63914 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read +gpua025:63839:63914 [3] NCCL INFO Connected all trees +gpua025:63839:63914 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua025:63839:63914 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua025:63839:63914 [3] NCCL INFO comm 0xc1e534d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua060:2854968:2854968 [0] NCCL INFO cudaDriverVersion 12010 +gpua060:2854968:2854968 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2854968:2854968 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2854968:2855043 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2854968:2855043 [0] NCCL INFO Using network IB +gpua060:2854968:2855043 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua060:2854968:2855043 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read +gpua060:2854968:2855043 [0] NCCL INFO Connected all rings +gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Connected all trees +gpua060:2854968:2855043 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2854968:2855043 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2854968:2855043 [0] NCCL INFO comm 0x9da77350 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua060:2854970:2854970 [2] NCCL INFO cudaDriverVersion 12010 +gpua060:2854970:2854970 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2854970:2854970 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2854970:2855044 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2854970:2855044 [2] NCCL INFO Using network IB +gpua060:2854970:2855044 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua060:2854970:2855044 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpua060:2854970:2855044 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read +gpua060:2854970:2855044 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read +gpua060:2854970:2855044 [2] NCCL INFO Connected all rings +gpua060:2854970:2855044 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read +gpua060:2854970:2855044 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read +gpua060:2854970:2855044 [2] NCCL INFO Connected all trees +gpua060:2854970:2855044 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2854970:2855044 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2854970:2855044 [2] NCCL INFO comm 0xb4b68d30 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua025:63836:63836 [0] NCCL INFO cudaDriverVersion 12010 +gpua025:63836:63836 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0> +gpua025:63836:63836 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua025:63836:63915 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0> +gpua025:63836:63915 [0] NCCL INFO Using network IB +gpua025:63836:63915 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua025:63836:63915 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read +gpua025:63836:63915 [0] NCCL INFO Connected all rings +gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Connected all trees +gpua025:63836:63915 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua025:63836:63915 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua025:63836:63915 [0] NCCL INFO comm 0x1772ec20 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua060:2854969:2854969 [1] NCCL INFO cudaDriverVersion 12010 +gpua060:2854969:2854969 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2854969:2854969 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2854969:2855042 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2854969:2855042 [1] NCCL INFO Using network IB +gpua060:2854969:2855042 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua060:2854969:2855042 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpua060:2854969:2855042 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read +gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read +gpua060:2854969:2855042 [1] NCCL INFO Connected all rings +gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpua060:2854969:2855042 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read +gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read +gpua060:2854969:2855042 [1] NCCL INFO Connected all trees +gpua060:2854969:2855042 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2854969:2855042 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2854969:2855042 [1] NCCL INFO comm 0x8c2cb6d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua005:322787:322787 [2] NCCL INFO cudaDriverVersion 12010 +gpua005:322787:322787 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0> +gpua005:322787:322787 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua005:322787:322863 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0> +gpua005:322787:322863 [2] NCCL INFO Using network IB +gpua005:322787:322863 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua005:322787:322863 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpua005:322787:322863 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read +gpua005:322787:322863 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read +gpua005:322787:322863 [2] NCCL INFO Connected all rings +gpua005:322787:322863 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read +gpua005:322787:322863 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read +gpua005:322787:322863 [2] NCCL INFO Connected all trees +gpua005:322787:322863 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua005:322787:322863 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua005:322787:322863 [2] NCCL INFO comm 0xa671d450 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua005:322788:322788 [3] NCCL INFO cudaDriverVersion 12010 +gpua005:322788:322788 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0> +gpua005:322788:322788 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua005:322788:322860 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0> +gpua005:322788:322860 [3] NCCL INFO Using network IB +gpua005:322788:322860 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua005:322788:322860 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpua005:322788:322860 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpua005:322788:322860 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpua005:322788:322860 [3] NCCL INFO Connected all rings +gpua005:322788:322860 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read +gpua005:322788:322860 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read +gpua005:322788:322860 [3] NCCL INFO Connected all trees +gpua005:322788:322860 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua005:322788:322860 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua005:322788:322860 [3] NCCL INFO comm 0xb7586590 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua005:322785:322785 [0] NCCL INFO cudaDriverVersion 12010 +gpua005:322785:322785 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0> +gpua005:322785:322785 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua005:322785:322861 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0> +gpua005:322785:322861 [0] NCCL INFO Using network IB +gpua005:322785:322861 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua005:322785:322861 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read +gpua005:322785:322861 [0] NCCL INFO Connected all rings +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Connected all trees +gpua005:322785:322861 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua005:322785:322861 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua005:322785:322861 [0] NCCL INFO comm 0xbdcfe00 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua005:322786:322786 [1] NCCL INFO cudaDriverVersion 12010 +gpua005:322786:322786 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0> +gpua005:322786:322786 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua005:322786:322862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0> +gpua005:322786:322862 [1] NCCL INFO Using network IB +gpua005:322786:322862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua005:322786:322862 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpua005:322786:322862 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read +gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read +gpua005:322786:322862 [1] NCCL INFO Connected all rings +gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpua005:322786:322862 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read +gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read +gpua005:322786:322862 [1] NCCL INFO Connected all trees +gpua005:322786:322862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua005:322786:322862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua005:322786:322862 [1] NCCL INFO comm 0x9e527b50 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua035:1685218:1685218 [2] NCCL INFO cudaDriverVersion 12010 +gpua035:1685218:1685218 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:1685218:1685218 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:1685218:1685292 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:1685218:1685292 [2] NCCL INFO Using network IB +gpua035:1685218:1685292 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua035:1685218:1685292 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpua035:1685218:1685292 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read +gpua035:1685218:1685292 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read +gpua035:1685218:1685292 [2] NCCL INFO Connected all rings +gpua035:1685218:1685292 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read +gpua035:1685218:1685292 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read +gpua035:1685218:1685292 [2] NCCL INFO Connected all trees +gpua035:1685218:1685292 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:1685218:1685292 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:1685218:1685292 [2] NCCL INFO comm 0x5149e590 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua035:1685217:1685217 [1] NCCL INFO cudaDriverVersion 12010 +gpua035:1685217:1685217 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:1685217:1685217 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:1685217:1685295 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:1685217:1685295 [1] NCCL INFO Using network IB +gpua035:1685217:1685295 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua035:1685217:1685295 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpua035:1685217:1685295 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read +gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read +gpua035:1685217:1685295 [1] NCCL INFO Connected all rings +gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpua035:1685217:1685295 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read +gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read +gpua035:1685217:1685295 [1] NCCL INFO Connected all trees +gpua035:1685217:1685295 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:1685217:1685295 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:1685217:1685295 [1] NCCL INFO comm 0x94073350 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua035:1685219:1685219 [3] NCCL INFO cudaDriverVersion 12010 +gpua035:1685219:1685219 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:1685219:1685219 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:1685219:1685293 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:1685219:1685293 [3] NCCL INFO Using network IB +gpua035:1685219:1685293 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua035:1685219:1685293 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpua035:1685219:1685293 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpua035:1685219:1685293 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpua035:1685219:1685293 [3] NCCL INFO Connected all rings +gpua035:1685219:1685293 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read +gpua035:1685219:1685293 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read +gpua035:1685219:1685293 [3] NCCL INFO Connected all trees +gpua035:1685219:1685293 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:1685219:1685293 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:1685219:1685293 [3] NCCL INFO comm 0x9d08f8e0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua035:1685216:1685216 [0] NCCL INFO cudaDriverVersion 12010 +gpua035:1685216:1685216 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:1685216:1685216 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:1685216:1685294 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:1685216:1685294 [0] NCCL INFO Using network IB +gpua035:1685216:1685294 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua035:1685216:1685294 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read +gpua035:1685216:1685294 [0] NCCL INFO Connected all rings +gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Connected all trees +gpua035:1685216:1685294 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:1685216:1685294 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:1685216:1685294 [0] NCCL INFO comm 0x8b5a90d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpua003:0/64] 2023-07-05 22:46:27,131 (trainer:732) INFO: 14epoch:train:1-100batch: iter_time=1.256, forward_time=0.181, loss_ctc=67.478, loss_att=50.061, acc=0.683, loss=55.286, backward_time=0.765, grad_norm=84.127, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.113, optim0_lr0=1.020e-04, train_time=5.776 +[gpua003:0/64] 2023-07-05 22:48:06,129 (trainer:732) INFO: 14epoch:train:101-200batch: iter_time=1.036e-04, forward_time=0.104, loss_ctc=76.315, loss_att=60.252, acc=0.659, loss=65.071, backward_time=0.747, grad_norm=106.131, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.020e-04, train_time=1.980 +[gpua003:0/64] 2023-07-05 22:49:44,933 (trainer:732) INFO: 14epoch:train:201-300batch: iter_time=1.112e-04, forward_time=0.104, loss_ctc=71.342, loss_att=53.820, acc=0.681, loss=59.077, backward_time=0.744, grad_norm=88.859, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.020e-04, train_time=1.976 +[gpua003:0/64] 2023-07-05 22:51:23,717 (trainer:732) INFO: 14epoch:train:301-400batch: iter_time=1.061e-04, forward_time=0.104, loss_ctc=74.278, loss_att=54.336, acc=0.672, loss=60.318, backward_time=0.744, grad_norm=83.344, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.019e-04, train_time=1.975 +[gpua003:0/64] 2023-07-05 22:53:02,383 (trainer:732) INFO: 14epoch:train:401-500batch: iter_time=9.902e-05, forward_time=0.104, loss_ctc=73.819, loss_att=59.568, acc=0.675, loss=63.843, backward_time=0.745, grad_norm=90.212, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.019e-04, train_time=1.973 +[gpua003:0/64] 2023-07-05 22:54:41,341 (trainer:732) INFO: 14epoch:train:501-600batch: iter_time=9.856e-05, forward_time=0.105, loss_ctc=67.201, loss_att=54.515, acc=0.666, loss=58.321, backward_time=0.746, grad_norm=85.901, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.018e-04, train_time=1.979 +[gpua003:0/64] 2023-07-05 22:56:20,255 (trainer:732) INFO: 14epoch:train:601-700batch: iter_time=9.906e-05, forward_time=0.105, loss_ctc=79.513, loss_att=65.577, acc=0.663, loss=69.758, backward_time=0.745, grad_norm=91.560, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.018e-04, train_time=1.978 +[gpua003:0/64] 2023-07-05 22:58:13,995 (trainer:732) INFO: 14epoch:train:701-800batch: iter_time=1.052e-04, forward_time=0.104, loss_ctc=86.261, loss_att=57.434, acc=0.687, loss=66.082, backward_time=0.756, grad_norm=111.598, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=2.275 +[gpua003:0/64] 2023-07-05 22:59:03,798 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-05 22:59:22,487 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 22:59:25,999 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 22:59:26,000 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-05 22:59:26,006 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-05 23:04:47,145 (trainer:732) INFO: 14epoch:train:801-900batch: iter_time=1.366, forward_time=0.106, loss_ctc=79.493, loss_att=56.962, acc=0.683, loss=63.721, backward_time=0.769, grad_norm=96.755, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=7.863 +[gpua003:0/64] 2023-07-05 23:06:27,057 (trainer:732) INFO: 14epoch:train:901-1000batch: iter_time=1.071e-04, forward_time=0.107, loss_ctc=77.587, loss_att=65.112, acc=0.669, loss=68.855, backward_time=0.749, grad_norm=96.509, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=1.998 +[gpua003:0/64] 2023-07-05 23:08:06,608 (trainer:732) INFO: 14epoch:train:1001-1100batch: iter_time=1.219e-04, forward_time=0.108, loss_ctc=69.740, loss_att=52.983, acc=0.695, loss=58.010, backward_time=0.747, grad_norm=83.010, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.016e-04, train_time=1.991 +[gpua003:0/64] 2023-07-05 23:09:46,058 (trainer:732) INFO: 14epoch:train:1101-1200batch: iter_time=9.900e-05, forward_time=0.107, loss_ctc=71.265, loss_att=51.955, acc=0.681, loss=57.748, backward_time=0.747, grad_norm=86.780, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.016e-04, train_time=1.989 +[gpua003:0/64] 2023-07-05 23:11:25,939 (trainer:732) INFO: 14epoch:train:1201-1300batch: iter_time=1.090e-04, forward_time=0.107, loss_ctc=72.245, loss_att=58.430, acc=0.682, loss=62.574, backward_time=0.748, grad_norm=82.034, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.015e-04, train_time=1.997 +[gpua003:0/64] 2023-07-05 23:13:05,201 (trainer:732) INFO: 14epoch:train:1301-1400batch: iter_time=1.137e-04, forward_time=0.106, loss_ctc=68.613, loss_att=56.255, acc=0.673, loss=59.963, backward_time=0.746, grad_norm=90.540, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.015e-04, train_time=1.985 +[gpua003:0/64] 2023-07-05 23:14:44,608 (trainer:732) INFO: 14epoch:train:1401-1500batch: iter_time=1.156e-04, forward_time=0.107, loss_ctc=75.103, loss_att=62.936, acc=0.675, loss=66.586, backward_time=0.747, grad_norm=91.101, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=1.988 +[gpua003:0/64] 2023-07-05 23:16:23,886 (trainer:732) INFO: 14epoch:train:1501-1600batch: iter_time=1.146e-04, forward_time=0.107, loss_ctc=85.725, loss_att=59.152, acc=0.686, loss=67.124, backward_time=0.748, grad_norm=323.443, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=1.985 +[gpua003:0/64] 2023-07-05 23:17:31,811 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-05 23:17:50,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 23:17:54,347 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 23:17:54,348 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-05 23:17:54,354 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-05 23:22:17,852 (trainer:732) INFO: 14epoch:train:1601-1700batch: iter_time=1.315, forward_time=0.107, loss_ctc=91.275, loss_att=63.464, acc=0.682, loss=71.807, backward_time=0.761, grad_norm=113.989, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=7.079 +[gpua003:0/64] 2023-07-05 23:23:57,809 (trainer:732) INFO: 14epoch:train:1701-1800batch: iter_time=1.095e-04, forward_time=0.106, loss_ctc=65.126, loss_att=52.331, acc=0.668, loss=56.169, backward_time=0.746, grad_norm=83.364, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.013e-04, train_time=1.999 +[gpua003:0/64] 2023-07-05 23:25:37,561 (trainer:732) INFO: 14epoch:train:1801-1900batch: iter_time=1.136e-04, forward_time=0.105, loss_ctc=79.242, loss_att=61.858, acc=0.676, loss=67.073, backward_time=0.744, grad_norm=97.701, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.013e-04, train_time=1.995 +[gpua003:0/64] 2023-07-05 23:27:17,066 (trainer:732) INFO: 14epoch:train:1901-2000batch: iter_time=1.351e-04, forward_time=0.108, loss_ctc=65.033, loss_att=46.741, acc=0.696, loss=52.229, backward_time=0.748, grad_norm=72.732, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.012e-04, train_time=1.990 +[gpua003:0/64] 2023-07-05 23:28:56,259 (trainer:732) INFO: 14epoch:train:2001-2100batch: iter_time=1.376e-04, forward_time=0.107, loss_ctc=74.297, loss_att=55.693, acc=0.679, loss=61.274, backward_time=0.747, grad_norm=81.546, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.111, optim0_lr0=1.012e-04, train_time=1.984 +[gpua003:0/64] 2023-07-05 23:30:35,698 (trainer:732) INFO: 14epoch:train:2101-2200batch: iter_time=1.153e-04, forward_time=0.107, loss_ctc=69.547, loss_att=58.660, acc=0.668, loss=61.926, backward_time=0.747, grad_norm=98.590, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.012e-04, train_time=1.989 +[gpua003:0/64] 2023-07-05 23:32:14,941 (trainer:732) INFO: 14epoch:train:2201-2300batch: iter_time=1.191e-04, forward_time=0.105, loss_ctc=70.991, loss_att=61.134, acc=0.665, loss=64.091, backward_time=0.745, grad_norm=88.986, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.011e-04, train_time=1.985 +[gpua003:0/64] 2023-07-05 23:33:54,151 (trainer:732) INFO: 14epoch:train:2301-2400batch: iter_time=1.136e-04, forward_time=0.106, loss_ctc=78.668, loss_att=55.499, acc=0.691, loss=62.450, backward_time=0.744, grad_norm=94.472, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.011e-04, train_time=1.984 +[gpua003:0/64] 2023-07-05 23:35:33,588 (trainer:732) INFO: 14epoch:train:2401-2500batch: iter_time=1.069e-04, forward_time=0.106, loss_ctc=89.577, loss_att=66.586, acc=0.668, loss=73.483, backward_time=0.746, grad_norm=115.780, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.010e-04, train_time=1.989 +[gpua003:0/64] 2023-07-05 23:35:35,877 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-05 23:35:54,848 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 23:35:58,368 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 23:35:58,368 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-05 23:35:58,374 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-05 23:42:19,890 (trainer:732) INFO: 14epoch:train:2501-2600batch: iter_time=1.276, forward_time=0.106, loss_ctc=65.318, loss_att=47.624, acc=0.706, loss=52.932, backward_time=0.758, grad_norm=76.861, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.010e-04, train_time=8.126 +[gpua003:0/64] 2023-07-05 23:44:00,174 (trainer:732) INFO: 14epoch:train:2601-2700batch: iter_time=1.018e-04, forward_time=0.106, loss_ctc=74.292, loss_att=60.131, acc=0.672, loss=64.379, backward_time=0.748, grad_norm=91.235, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.010e-04, train_time=2.005 +[gpua003:0/64] 2023-07-05 23:45:39,635 (trainer:732) INFO: 14epoch:train:2701-2800batch: iter_time=9.909e-05, forward_time=0.106, loss_ctc=70.460, loss_att=53.646, acc=0.696, loss=58.690, backward_time=0.747, grad_norm=76.382, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.009e-04, train_time=1.989 +[gpua003:0/64] 2023-07-05 23:47:18,986 (trainer:732) INFO: 14epoch:train:2801-2900batch: iter_time=9.455e-05, forward_time=0.107, loss_ctc=72.107, loss_att=52.607, acc=0.679, loss=58.457, backward_time=0.746, grad_norm=92.055, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.009e-04, train_time=1.987 +[gpua003:0/64] 2023-07-05 23:48:58,340 (trainer:732) INFO: 14epoch:train:2901-3000batch: iter_time=8.975e-05, forward_time=0.106, loss_ctc=71.947, loss_att=58.354, acc=0.687, loss=62.432, backward_time=0.747, grad_norm=87.978, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.008e-04, train_time=1.987 +[gpua003:0/64] 2023-07-05 23:50:37,583 (trainer:732) INFO: 14epoch:train:3001-3100batch: iter_time=1.174e-04, forward_time=0.106, loss_ctc=67.838, loss_att=54.060, acc=0.680, loss=58.194, backward_time=0.746, grad_norm=87.646, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.008e-04, train_time=1.985 +[gpua003:0/64] 2023-07-05 23:52:16,968 (trainer:732) INFO: 14epoch:train:3101-3200batch: iter_time=9.872e-05, forward_time=0.106, loss_ctc=78.444, loss_att=65.330, acc=0.674, loss=69.264, backward_time=0.747, grad_norm=89.372, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.007e-04, train_time=1.987 +[gpua003:0/64] 2023-07-05 23:53:56,336 (trainer:732) INFO: 14epoch:train:3201-3300batch: iter_time=1.046e-04, forward_time=0.106, loss_ctc=84.969, loss_att=57.606, acc=0.694, loss=65.815, backward_time=0.746, grad_norm=110.526, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.007e-04, train_time=1.987 +[gpua003:0/64] 2023-07-05 23:54:31,552 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-05 23:54:50,698 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 23:54:54,271 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 23:54:54,271 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-05 23:54:54,277 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-05 23:59:43,333 (trainer:732) INFO: 14epoch:train:3301-3400batch: iter_time=1.303, forward_time=0.146, loss_ctc=74.612, loss_att=55.066, acc=0.684, loss=60.930, backward_time=0.760, grad_norm=98.484, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.111, optim0_lr0=1.007e-04, train_time=6.939 +[gpua003:0/64] 2023-07-06 00:01:23,508 (trainer:732) INFO: 14epoch:train:3401-3500batch: iter_time=9.903e-05, forward_time=0.106, loss_ctc=79.257, loss_att=64.273, acc=0.665, loss=68.768, backward_time=0.747, grad_norm=97.943, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.006e-04, train_time=2.004 +[gpua003:0/64] 2023-07-06 00:03:02,774 (trainer:732) INFO: 14epoch:train:3501-3600batch: iter_time=1.067e-04, forward_time=0.105, loss_ctc=69.162, loss_att=52.606, acc=0.690, loss=57.572, backward_time=0.744, grad_norm=91.125, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.006e-04, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:04:42,112 (trainer:732) INFO: 14epoch:train:3601-3700batch: iter_time=1.060e-04, forward_time=0.105, loss_ctc=67.528, loss_att=48.483, acc=0.692, loss=54.197, backward_time=0.746, grad_norm=77.302, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.005e-04, train_time=1.987 +[gpua003:0/64] 2023-07-06 00:06:21,444 (trainer:732) INFO: 14epoch:train:3701-3800batch: iter_time=1.056e-04, forward_time=0.105, loss_ctc=70.824, loss_att=56.721, acc=0.686, loss=60.952, backward_time=0.745, grad_norm=88.267, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.005e-04, train_time=1.986 +[gpua003:0/64] 2023-07-06 00:08:00,672 (trainer:732) INFO: 14epoch:train:3801-3900batch: iter_time=1.154e-04, forward_time=0.106, loss_ctc=64.923, loss_att=53.550, acc=0.673, loss=56.962, backward_time=0.746, grad_norm=87.728, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.005e-04, train_time=1.984 +[gpua003:0/64] 2023-07-06 00:09:40,070 (trainer:732) INFO: 14epoch:train:3901-4000batch: iter_time=1.270e-04, forward_time=0.106, loss_ctc=73.750, loss_att=61.788, acc=0.673, loss=65.377, backward_time=0.746, grad_norm=99.456, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.004e-04, train_time=1.988 +[gpua003:0/64] 2023-07-06 00:11:19,231 (trainer:732) INFO: 14epoch:train:4001-4100batch: iter_time=1.074e-04, forward_time=0.105, loss_ctc=83.056, loss_att=57.768, acc=0.689, loss=65.354, backward_time=0.744, grad_norm=118.212, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.004e-04, train_time=1.983 +[gpua003:0/64] 2023-07-06 00:12:39,831 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 00:12:59,113 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 00:13:02,670 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 00:13:02,670 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 00:13:02,687 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 00:16:52,797 (trainer:732) INFO: 14epoch:train:4101-4200batch: iter_time=2.223, forward_time=0.105, loss_ctc=86.387, loss_att=61.491, acc=0.682, loss=68.960, backward_time=0.756, grad_norm=109.255, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.003e-04, train_time=6.671 +[gpua003:0/64] 2023-07-06 00:18:32,838 (trainer:732) INFO: 14epoch:train:4201-4300batch: iter_time=9.583e-05, forward_time=0.105, loss_ctc=66.113, loss_att=52.338, acc=0.676, loss=56.471, backward_time=0.749, grad_norm=87.547, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.003e-04, train_time=2.001 +[gpua003:0/64] 2023-07-06 00:20:12,105 (trainer:732) INFO: 14epoch:train:4301-4400batch: iter_time=9.179e-05, forward_time=0.105, loss_ctc=74.526, loss_att=56.969, acc=0.689, loss=62.236, backward_time=0.745, grad_norm=94.528, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.003e-04, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:21:52,782 (trainer:732) INFO: 14epoch:train:4401-4500batch: iter_time=1.001e-04, forward_time=0.105, loss_ctc=67.853, loss_att=50.469, acc=0.683, loss=55.684, backward_time=0.746, grad_norm=71.313, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.002e-04, train_time=2.013 +[gpua003:0/64] 2023-07-06 00:23:34,997 (trainer:732) INFO: 14epoch:train:4501-4600batch: iter_time=1.021e-04, forward_time=0.105, loss_ctc=70.434, loss_att=50.423, acc=0.692, loss=56.426, backward_time=0.747, grad_norm=81.773, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.002e-04, train_time=2.044 +[gpua003:0/64] 2023-07-06 00:25:14,224 (trainer:732) INFO: 14epoch:train:4601-4700batch: iter_time=9.233e-05, forward_time=0.104, loss_ctc=72.767, loss_att=60.956, acc=0.675, loss=64.499, backward_time=0.745, grad_norm=91.196, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.001e-04, train_time=1.984 +[gpua003:0/64] 2023-07-06 00:26:53,508 (trainer:732) INFO: 14epoch:train:4701-4800batch: iter_time=9.383e-05, forward_time=0.105, loss_ctc=67.376, loss_att=54.770, acc=0.670, loss=58.552, backward_time=0.746, grad_norm=96.216, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.001e-04, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:28:32,809 (trainer:732) INFO: 14epoch:train:4801-4900batch: iter_time=1.024e-04, forward_time=0.105, loss_ctc=78.619, loss_att=59.814, acc=0.681, loss=65.456, backward_time=0.745, grad_norm=92.282, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.001e-04, train_time=1.986 +[gpua003:0/64] 2023-07-06 00:30:12,054 (trainer:732) INFO: 14epoch:train:4901-5000batch: iter_time=1.019e-04, forward_time=0.106, loss_ctc=85.585, loss_att=63.122, acc=0.681, loss=69.861, backward_time=0.744, grad_norm=103.405, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.000e-04, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:30:14,272 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 00:30:33,230 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 00:30:36,750 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 00:30:36,750 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 00:30:36,757 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 00:35:36,135 (trainer:732) INFO: 14epoch:train:5001-5100batch: iter_time=1.279, forward_time=0.105, loss_ctc=64.965, loss_att=47.311, acc=0.710, loss=52.607, backward_time=0.755, grad_norm=80.711, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.998e-05, train_time=6.481 +[gpua003:0/64] 2023-07-06 00:37:16,313 (trainer:732) INFO: 14epoch:train:5101-5200batch: iter_time=1.019e-04, forward_time=0.105, loss_ctc=71.440, loss_att=58.275, acc=0.679, loss=62.225, backward_time=0.746, grad_norm=92.300, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.994e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 00:38:55,753 (trainer:732) INFO: 14epoch:train:5201-5300batch: iter_time=9.692e-05, forward_time=0.106, loss_ctc=68.311, loss_att=51.470, acc=0.704, loss=56.522, backward_time=0.746, grad_norm=109.860, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.990e-05, train_time=1.989 +[gpua003:0/64] 2023-07-06 00:40:35,087 (trainer:732) INFO: 14epoch:train:5301-5400batch: iter_time=9.289e-05, forward_time=0.106, loss_ctc=71.699, loss_att=52.171, acc=0.683, loss=58.029, backward_time=0.746, grad_norm=87.463, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.986e-05, train_time=1.986 +[gpua003:0/64] 2023-07-06 00:42:14,290 (trainer:732) INFO: 14epoch:train:5401-5500batch: iter_time=1.061e-04, forward_time=0.105, loss_ctc=71.095, loss_att=58.813, acc=0.688, loss=62.498, backward_time=0.744, grad_norm=80.801, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.982e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 00:43:53,453 (trainer:732) INFO: 14epoch:train:5501-5600batch: iter_time=1.011e-04, forward_time=0.105, loss_ctc=65.967, loss_att=52.829, acc=0.683, loss=56.770, backward_time=0.743, grad_norm=80.073, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.978e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 00:45:32,694 (trainer:732) INFO: 14epoch:train:5601-5700batch: iter_time=9.394e-05, forward_time=0.105, loss_ctc=76.086, loss_att=65.998, acc=0.677, loss=69.024, backward_time=0.745, grad_norm=84.296, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.974e-05, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:47:11,756 (trainer:732) INFO: 14epoch:train:5701-5800batch: iter_time=1.005e-04, forward_time=0.104, loss_ctc=80.981, loss_att=57.081, acc=0.692, loss=64.251, backward_time=0.745, grad_norm=117.230, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.970e-05, train_time=1.981 +[gpua003:0/64] 2023-07-06 00:47:46,785 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 00:48:05,668 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 00:48:09,217 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 00:48:09,217 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 00:48:09,223 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 00:51:56,399 (trainer:732) INFO: 14epoch:train:5801-5900batch: iter_time=1.268, forward_time=0.105, loss_ctc=70.851, loss_att=51.948, acc=0.699, loss=57.619, backward_time=0.758, grad_norm=95.312, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.966e-05, train_time=5.693 +[gpua003:0/64] 2023-07-06 00:53:36,326 (trainer:732) INFO: 14epoch:train:5901-6000batch: iter_time=9.412e-05, forward_time=0.105, loss_ctc=74.049, loss_att=61.596, acc=0.687, loss=65.332, backward_time=0.745, grad_norm=83.262, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.962e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 00:55:15,556 (trainer:732) INFO: 14epoch:train:6001-6100batch: iter_time=9.442e-05, forward_time=0.105, loss_ctc=72.845, loss_att=54.112, acc=0.692, loss=59.732, backward_time=0.744, grad_norm=84.620, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.958e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 00:56:54,727 (trainer:732) INFO: 14epoch:train:6101-6200batch: iter_time=9.749e-05, forward_time=0.106, loss_ctc=62.931, loss_att=46.045, acc=0.705, loss=51.111, backward_time=0.744, grad_norm=75.046, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.954e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 00:58:34,088 (trainer:732) INFO: 14epoch:train:6201-6300batch: iter_time=9.675e-05, forward_time=0.106, loss_ctc=75.966, loss_att=57.343, acc=0.689, loss=62.930, backward_time=0.745, grad_norm=93.914, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.950e-05, train_time=1.987 +[gpua003:0/64] 2023-07-06 01:00:28,991 (trainer:732) INFO: 14epoch:train:6301-6400batch: iter_time=9.513e-05, forward_time=0.105, loss_ctc=65.892, loss_att=55.960, acc=0.686, loss=58.940, backward_time=0.769, grad_norm=98.285, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.946e-05, train_time=2.298 +[gpua003:0/64] 2023-07-06 01:02:08,399 (trainer:732) INFO: 14epoch:train:6401-6500batch: iter_time=9.649e-05, forward_time=0.106, loss_ctc=71.103, loss_att=59.408, acc=0.685, loss=62.916, backward_time=0.746, grad_norm=81.143, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.942e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 01:03:51,462 (trainer:732) INFO: 14epoch:train:6501-6600batch: iter_time=1.032e-04, forward_time=0.106, loss_ctc=81.907, loss_att=58.088, acc=0.700, loss=65.234, backward_time=0.749, grad_norm=98.608, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.938e-05, train_time=2.061 +[gpua003:0/64] 2023-07-06 01:04:59,329 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 01:05:18,889 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 01:05:22,372 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 01:05:22,373 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 01:05:22,379 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 01:10:37,392 (trainer:732) INFO: 14epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.107, loss_ctc=81.131, loss_att=58.478, acc=0.688, loss=65.274, backward_time=0.755, grad_norm=114.707, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.935e-05, train_time=8.118 +[gpua003:0/64] 2023-07-06 01:12:18,338 (trainer:732) INFO: 14epoch:train:6701-6800batch: iter_time=1.129e-04, forward_time=0.106, loss_ctc=63.589, loss_att=52.978, acc=0.681, loss=56.162, backward_time=0.750, grad_norm=83.786, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.931e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 01:13:57,605 (trainer:732) INFO: 14epoch:train:6801-6900batch: iter_time=1.194e-04, forward_time=0.106, loss_ctc=73.216, loss_att=56.431, acc=0.697, loss=61.466, backward_time=0.746, grad_norm=85.113, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.927e-05, train_time=1.985 +[gpua003:0/64] 2023-07-06 01:15:36,756 (trainer:732) INFO: 14epoch:train:6901-7000batch: iter_time=1.192e-04, forward_time=0.106, loss_ctc=66.478, loss_att=48.895, acc=0.692, loss=54.170, backward_time=0.745, grad_norm=88.642, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.923e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 01:17:15,887 (trainer:732) INFO: 14epoch:train:7001-7100batch: iter_time=1.179e-04, forward_time=0.106, loss_ctc=70.266, loss_att=50.382, acc=0.693, loss=56.347, backward_time=0.746, grad_norm=84.071, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.919e-05, train_time=1.982 +[gpua003:0/64] 2023-07-06 01:18:54,896 (trainer:732) INFO: 14epoch:train:7101-7200batch: iter_time=1.187e-04, forward_time=0.105, loss_ctc=73.012, loss_att=61.557, acc=0.673, loss=64.994, backward_time=0.746, grad_norm=89.483, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.915e-05, train_time=1.980 +[gpua003:0/64] 2023-07-06 01:20:34,275 (trainer:732) INFO: 14epoch:train:7201-7300batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=65.363, loss_att=55.629, acc=0.669, loss=58.549, backward_time=0.746, grad_norm=89.409, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.911e-05, train_time=1.987 +[gpua003:0/64] 2023-07-06 01:22:13,430 (trainer:732) INFO: 14epoch:train:7301-7400batch: iter_time=1.061e-04, forward_time=0.105, loss_ctc=78.086, loss_att=58.693, acc=0.692, loss=64.511, backward_time=0.744, grad_norm=93.347, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.907e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 01:23:52,443 (trainer:732) INFO: 14epoch:train:7401-7500batch: iter_time=1.255e-04, forward_time=0.105, loss_ctc=85.073, loss_att=62.105, acc=0.679, loss=68.995, backward_time=0.745, grad_norm=99.476, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.903e-05, train_time=1.980 +[gpua003:0/64] 2023-07-06 01:23:53,863 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 01:24:12,946 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 01:24:16,467 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 01:24:16,467 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 01:24:16,474 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 01:28:56,627 (trainer:732) INFO: 14epoch:train:7501-7600batch: iter_time=1.287, forward_time=0.106, loss_ctc=68.678, loss_att=50.359, acc=0.697, loss=55.855, backward_time=0.755, grad_norm=80.423, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.899e-05, train_time=6.083 +[gpua003:0/64] 2023-07-06 01:30:36,072 (trainer:732) INFO: 14epoch:train:7601-7700batch: iter_time=1.040e-04, forward_time=0.106, loss_ctc=68.907, loss_att=57.070, acc=0.685, loss=60.621, backward_time=0.745, grad_norm=91.649, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.896e-05, train_time=1.989 +[gpua003:0/64] 2023-07-06 01:32:15,336 (trainer:732) INFO: 14epoch:train:7701-7800batch: iter_time=1.071e-04, forward_time=0.106, loss_ctc=70.050, loss_att=51.852, acc=0.696, loss=57.312, backward_time=0.744, grad_norm=83.173, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.892e-05, train_time=1.985 +[gpua003:0/64] 2023-07-06 01:33:54,551 (trainer:732) INFO: 14epoch:train:7801-7900batch: iter_time=9.941e-05, forward_time=0.106, loss_ctc=73.006, loss_att=51.646, acc=0.691, loss=58.054, backward_time=0.745, grad_norm=86.960, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.888e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 01:35:34,182 (trainer:732) INFO: 14epoch:train:7901-8000batch: iter_time=8.641e-05, forward_time=0.107, loss_ctc=70.408, loss_att=60.834, acc=0.686, loss=63.707, backward_time=0.748, grad_norm=102.961, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.884e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 01:37:13,489 (trainer:732) INFO: 14epoch:train:8001-8100batch: iter_time=1.062e-04, forward_time=0.107, loss_ctc=66.066, loss_att=55.633, acc=0.676, loss=58.763, backward_time=0.747, grad_norm=84.748, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.880e-05, train_time=1.986 +[gpua003:0/64] 2023-07-06 01:38:55,993 (trainer:732) INFO: 14epoch:train:8101-8200batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=77.571, loss_att=61.968, acc=0.687, loss=66.649, backward_time=0.748, grad_norm=91.196, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.876e-05, train_time=2.050 +[gpua003:0/64] 2023-07-06 01:40:35,417 (trainer:732) INFO: 14epoch:train:8201-8300batch: iter_time=9.520e-05, forward_time=0.107, loss_ctc=80.442, loss_att=55.920, acc=0.694, loss=63.276, backward_time=0.747, grad_norm=82.962, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.872e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 01:41:10,610 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 01:41:29,651 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 01:41:33,072 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 01:41:33,072 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 01:41:33,078 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 01:46:45,702 (trainer:732) INFO: 14epoch:train:8301-8400batch: iter_time=1.290, forward_time=0.120, loss_ctc=72.247, loss_att=53.015, acc=0.698, loss=58.785, backward_time=0.761, grad_norm=82.758, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.869e-05, train_time=7.405 +[gpua003:0/64] 2023-07-06 01:48:26,447 (trainer:732) INFO: 14epoch:train:8401-8500batch: iter_time=1.170e-04, forward_time=0.105, loss_ctc=72.826, loss_att=61.567, acc=0.673, loss=64.945, backward_time=0.746, grad_norm=92.862, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.865e-05, train_time=2.015 +[gpua003:0/64] 2023-07-06 01:50:06,144 (trainer:732) INFO: 14epoch:train:8501-8600batch: iter_time=1.142e-04, forward_time=0.105, loss_ctc=71.755, loss_att=54.026, acc=0.689, loss=59.345, backward_time=0.745, grad_norm=84.483, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.861e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 01:51:45,693 (trainer:732) INFO: 14epoch:train:8601-8700batch: iter_time=1.203e-04, forward_time=0.106, loss_ctc=62.700, loss_att=45.802, acc=0.702, loss=50.872, backward_time=0.746, grad_norm=76.913, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.857e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 01:53:38,865 (trainer:732) INFO: 14epoch:train:8701-8800batch: iter_time=1.071e-04, forward_time=0.106, loss_ctc=74.653, loss_att=55.710, acc=0.688, loss=61.393, backward_time=0.773, grad_norm=129.792, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.853e-05, train_time=2.263 +[gpua003:0/64] 2023-07-06 01:55:18,104 (trainer:732) INFO: 14epoch:train:8801-8900batch: iter_time=1.105e-04, forward_time=0.106, loss_ctc=64.757, loss_att=56.246, acc=0.674, loss=58.799, backward_time=0.744, grad_norm=80.970, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.849e-05, train_time=1.985 +[gpua003:0/64] 2023-07-06 01:56:59,672 (trainer:732) INFO: 14epoch:train:8901-9000batch: iter_time=1.077e-04, forward_time=0.106, loss_ctc=70.334, loss_att=57.944, acc=0.678, loss=61.661, backward_time=0.751, grad_norm=83.367, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.846e-05, train_time=2.031 +[gpua003:0/64] 2023-07-06 01:58:38,903 (trainer:732) INFO: 14epoch:train:9001-9100batch: iter_time=1.082e-04, forward_time=0.106, loss_ctc=80.565, loss_att=57.072, acc=0.700, loss=64.119, backward_time=0.745, grad_norm=93.217, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.842e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 01:59:52,859 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 02:00:11,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 02:00:15,305 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 02:00:15,305 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 02:00:15,311 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 02:04:51,398 (trainer:732) INFO: 14epoch:train:9101-9200batch: iter_time=1.321, forward_time=0.157, loss_ctc=81.944, loss_att=58.952, acc=0.689, loss=65.850, backward_time=0.767, grad_norm=105.225, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=9.838e-05, train_time=7.449 +[gpua003:0/64] 2023-07-06 02:06:31,077 (trainer:732) INFO: 14epoch:train:9201-9300batch: iter_time=1.023e-04, forward_time=0.105, loss_ctc=66.026, loss_att=53.515, acc=0.692, loss=57.268, backward_time=0.747, grad_norm=86.498, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.834e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 02:08:11,806 (trainer:732) INFO: 14epoch:train:9301-9400batch: iter_time=9.862e-05, forward_time=0.106, loss_ctc=73.964, loss_att=57.337, acc=0.700, loss=62.325, backward_time=0.745, grad_norm=85.275, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.830e-05, train_time=2.014 +[gpua003:0/64] 2023-07-06 02:09:51,859 (trainer:732) INFO: 14epoch:train:9401-9500batch: iter_time=1.055e-04, forward_time=0.106, loss_ctc=64.980, loss_att=48.354, acc=0.695, loss=53.342, backward_time=0.745, grad_norm=74.030, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.827e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 02:11:31,288 (trainer:732) INFO: 14epoch:train:9501-9600batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=70.565, loss_att=51.030, acc=0.695, loss=56.890, backward_time=0.744, grad_norm=84.506, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.823e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 02:13:10,467 (trainer:732) INFO: 14epoch:train:9601-9700batch: iter_time=1.057e-04, forward_time=0.106, loss_ctc=71.513, loss_att=60.661, acc=0.691, loss=63.916, backward_time=0.745, grad_norm=87.016, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.819e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 02:14:49,595 (trainer:732) INFO: 14epoch:train:9701-9800batch: iter_time=1.059e-04, forward_time=0.106, loss_ctc=65.538, loss_att=54.445, acc=0.682, loss=57.773, backward_time=0.744, grad_norm=94.902, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.815e-05, train_time=1.982 +[gpua003:0/64] 2023-07-06 02:16:28,940 (trainer:732) INFO: 14epoch:train:9801-9900batch: iter_time=1.015e-04, forward_time=0.107, loss_ctc=77.709, loss_att=58.250, acc=0.701, loss=64.087, backward_time=0.745, grad_norm=110.894, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.811e-05, train_time=1.987 +[gpua003:0/64] 2023-07-06 02:18:08,171 (trainer:732) INFO: 14epoch:train:9901-10000batch: iter_time=9.460e-05, forward_time=0.106, loss_ctc=82.982, loss_att=61.283, acc=0.688, loss=67.793, backward_time=0.744, grad_norm=103.022, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.808e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 02:32:00,423 (trainer:338) INFO: 14epoch results: [train] iter_time=0.165, forward_time=0.108, loss_ctc=73.204, loss_att=56.354, acc=0.685, loss=61.409, backward_time=0.748, grad_norm=93.679, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.000e-04, train_time=2.598, time=3 hours, 36 minutes and 52.71 seconds, total_count=110000, gpu_max_cached_mem_GB=34.473, [valid] loss_ctc=52.779, cer_ctc=0.299, loss_att=43.314, acc=0.648, cer=0.406, wer=0.989, loss=46.153, time=7 minutes and 15.36 seconds, total_count=11638, gpu_max_cached_mem_GB=37.768, [att_plot] time=6 minutes and 14.08 seconds, total_count=0, gpu_max_cached_mem_GB=37.768 +[gpua003:0/64] 2023-07-06 02:32:18,801 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua003:0/64] 2023-07-06 02:32:18,908 (trainer:272) INFO: 15/100epoch started. Estimated time to finish: 1 week, 6 days and 18 hours +[gpua003:0/64] 2023-07-06 02:32:19,981 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 02:32:39,156 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 02:32:42,632 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 02:32:42,633 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 02:32:42,673 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 02:39:26,949 (trainer:732) INFO: 15epoch:train:1-100batch: iter_time=3.199, forward_time=0.158, loss_ctc=82.906, loss_att=63.268, acc=0.672, loss=69.159, backward_time=0.766, grad_norm=96.452, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.118, optim0_lr0=9.804e-05, train_time=8.551 +[gpua003:0/64] 2023-07-06 02:41:12,530 (trainer:732) INFO: 15epoch:train:101-200batch: iter_time=1.083e-04, forward_time=0.110, loss_ctc=93.463, loss_att=58.148, acc=0.684, loss=68.742, backward_time=0.761, grad_norm=99.343, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.800e-05, train_time=2.112 +[gpua003:0/64] 2023-07-06 02:42:58,616 (trainer:732) INFO: 15epoch:train:201-300batch: iter_time=1.075e-04, forward_time=0.109, loss_ctc=72.287, loss_att=52.500, acc=0.677, loss=58.436, backward_time=0.756, grad_norm=100.014, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.796e-05, train_time=2.122 +[gpua003:0/64] 2023-07-06 02:44:49,824 (trainer:732) INFO: 15epoch:train:301-400batch: iter_time=1.050e-04, forward_time=0.108, loss_ctc=74.935, loss_att=63.168, acc=0.669, loss=66.698, backward_time=0.769, grad_norm=93.180, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.793e-05, train_time=2.224 +[gpua003:0/64] 2023-07-06 02:46:40,595 (trainer:732) INFO: 15epoch:train:401-500batch: iter_time=1.069e-04, forward_time=0.109, loss_ctc=84.885, loss_att=68.005, acc=0.654, loss=73.069, backward_time=0.761, grad_norm=120.148, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=9.789e-05, train_time=2.215 +[gpua003:0/64] 2023-07-06 02:48:27,207 (trainer:732) INFO: 15epoch:train:501-600batch: iter_time=1.120e-04, forward_time=0.116, loss_ctc=88.432, loss_att=68.337, acc=0.676, loss=74.366, backward_time=0.760, grad_norm=91.380, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.785e-05, train_time=2.132 +[gpua003:0/64] 2023-07-06 02:50:13,816 (trainer:732) INFO: 15epoch:train:601-700batch: iter_time=1.091e-04, forward_time=0.109, loss_ctc=76.229, loss_att=54.425, acc=0.696, loss=60.966, backward_time=0.767, grad_norm=83.791, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.781e-05, train_time=2.132 +[gpua003:0/64] 2023-07-06 02:52:11,799 (trainer:732) INFO: 15epoch:train:701-800batch: iter_time=1.037e-04, forward_time=0.120, loss_ctc=79.546, loss_att=62.351, acc=0.675, loss=67.509, backward_time=0.782, grad_norm=103.551, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.778e-05, train_time=2.359 +[gpua003:0/64] 2023-07-06 02:53:02,111 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 02:53:21,354 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 02:53:24,837 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 02:53:24,837 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 02:53:24,887 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 02:58:05,256 (trainer:732) INFO: 15epoch:train:801-900batch: iter_time=1.628, forward_time=0.133, loss_ctc=82.093, loss_att=59.999, acc=0.678, loss=66.627, backward_time=0.769, grad_norm=104.738, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.774e-05, train_time=7.069 +[gpua003:0/64] 2023-07-06 02:59:46,184 (trainer:732) INFO: 15epoch:train:901-1000batch: iter_time=1.123e-04, forward_time=0.108, loss_ctc=77.407, loss_att=56.196, acc=0.685, loss=62.559, backward_time=0.752, grad_norm=95.582, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.770e-05, train_time=2.018 +[gpua003:0/64] 2023-07-06 03:01:26,322 (trainer:732) INFO: 15epoch:train:1001-1100batch: iter_time=9.835e-05, forward_time=0.107, loss_ctc=85.077, loss_att=54.745, acc=0.695, loss=63.845, backward_time=0.755, grad_norm=87.655, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.766e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 03:03:06,138 (trainer:732) INFO: 15epoch:train:1101-1200batch: iter_time=8.910e-05, forward_time=0.106, loss_ctc=71.804, loss_att=57.490, acc=0.664, loss=61.784, backward_time=0.752, grad_norm=96.520, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.763e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 03:04:46,029 (trainer:732) INFO: 15epoch:train:1201-1300batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=78.767, loss_att=64.261, acc=0.679, loss=68.613, backward_time=0.752, grad_norm=95.553, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.759e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 03:06:25,781 (trainer:732) INFO: 15epoch:train:1301-1400batch: iter_time=9.587e-05, forward_time=0.106, loss_ctc=89.123, loss_att=69.314, acc=0.678, loss=75.257, backward_time=0.752, grad_norm=99.925, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.755e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 03:08:05,655 (trainer:732) INFO: 15epoch:train:1401-1500batch: iter_time=9.323e-05, forward_time=0.106, loss_ctc=77.240, loss_att=56.417, acc=0.693, loss=62.664, backward_time=0.752, grad_norm=92.784, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.751e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 03:09:45,213 (trainer:732) INFO: 15epoch:train:1501-1600batch: iter_time=9.074e-05, forward_time=0.105, loss_ctc=76.823, loss_att=57.859, acc=0.677, loss=63.548, backward_time=0.751, grad_norm=99.313, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.748e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 03:10:52,700 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 03:11:11,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 03:11:15,554 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 03:11:15,554 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 03:11:15,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 03:16:19,134 (trainer:732) INFO: 15epoch:train:1601-1700batch: iter_time=1.281, forward_time=0.107, loss_ctc=78.684, loss_att=60.055, acc=0.678, loss=65.643, backward_time=0.768, grad_norm=81.939, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.744e-05, train_time=7.878 +[gpua003:0/64] 2023-07-06 03:18:00,308 (trainer:732) INFO: 15epoch:train:1701-1800batch: iter_time=9.922e-05, forward_time=0.107, loss_ctc=79.008, loss_att=56.298, acc=0.696, loss=63.111, backward_time=0.753, grad_norm=81.521, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.740e-05, train_time=2.023 +[gpua003:0/64] 2023-07-06 03:19:40,539 (trainer:732) INFO: 15epoch:train:1801-1900batch: iter_time=9.836e-05, forward_time=0.108, loss_ctc=90.709, loss_att=57.184, acc=0.688, loss=67.242, backward_time=0.754, grad_norm=97.794, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.737e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 03:21:20,631 (trainer:732) INFO: 15epoch:train:1901-2000batch: iter_time=9.980e-05, forward_time=0.107, loss_ctc=71.072, loss_att=53.719, acc=0.682, loss=58.925, backward_time=0.751, grad_norm=83.439, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.733e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 03:23:00,483 (trainer:732) INFO: 15epoch:train:2001-2100batch: iter_time=9.705e-05, forward_time=0.107, loss_ctc=74.810, loss_att=63.359, acc=0.675, loss=66.794, backward_time=0.751, grad_norm=91.749, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.729e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 03:24:40,225 (trainer:732) INFO: 15epoch:train:2101-2200batch: iter_time=9.846e-05, forward_time=0.107, loss_ctc=86.087, loss_att=63.950, acc=0.671, loss=70.591, backward_time=0.751, grad_norm=104.916, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.726e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 03:26:19,997 (trainer:732) INFO: 15epoch:train:2201-2300batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=81.168, loss_att=65.843, acc=0.683, loss=70.440, backward_time=0.751, grad_norm=85.546, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.722e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 03:28:19,638 (trainer:732) INFO: 15epoch:train:2301-2400batch: iter_time=9.845e-05, forward_time=0.106, loss_ctc=75.986, loss_att=55.773, acc=0.686, loss=61.837, backward_time=0.780, grad_norm=89.935, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.718e-05, train_time=2.393 +[gpua003:0/64] 2023-07-06 03:30:11,150 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 03:30:30,084 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 03:30:33,644 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 03:30:33,644 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 03:30:33,650 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 03:33:45,898 (trainer:732) INFO: 15epoch:train:2401-2500batch: iter_time=1.301, forward_time=0.142, loss_ctc=78.818, loss_att=56.765, acc=0.693, loss=63.381, backward_time=0.782, grad_norm=91.105, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.715e-05, train_time=6.525 +[gpua003:0/64] 2023-07-06 03:35:27,576 (trainer:732) INFO: 15epoch:train:2501-2600batch: iter_time=1.159e-04, forward_time=0.110, loss_ctc=80.164, loss_att=60.607, acc=0.691, loss=66.474, backward_time=0.759, grad_norm=88.751, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.711e-05, train_time=2.034 +[gpua003:0/64] 2023-07-06 03:37:07,711 (trainer:732) INFO: 15epoch:train:2601-2700batch: iter_time=1.071e-04, forward_time=0.108, loss_ctc=89.216, loss_att=56.175, acc=0.699, loss=66.087, backward_time=0.752, grad_norm=94.279, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.707e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 03:38:47,535 (trainer:732) INFO: 15epoch:train:2701-2800batch: iter_time=1.003e-04, forward_time=0.107, loss_ctc=71.669, loss_att=52.232, acc=0.691, loss=58.063, backward_time=0.752, grad_norm=78.637, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.704e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 03:40:27,453 (trainer:732) INFO: 15epoch:train:2801-2900batch: iter_time=9.136e-05, forward_time=0.108, loss_ctc=72.118, loss_att=61.501, acc=0.688, loss=64.686, backward_time=0.752, grad_norm=86.278, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.700e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 03:42:07,650 (trainer:732) INFO: 15epoch:train:2901-3000batch: iter_time=9.709e-05, forward_time=0.108, loss_ctc=83.002, loss_att=63.713, acc=0.675, loss=69.500, backward_time=0.754, grad_norm=94.808, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.696e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 03:43:47,676 (trainer:732) INFO: 15epoch:train:3001-3100batch: iter_time=9.954e-05, forward_time=0.108, loss_ctc=85.122, loss_att=64.702, acc=0.694, loss=70.828, backward_time=0.754, grad_norm=92.367, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.693e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 03:45:30,195 (trainer:732) INFO: 15epoch:train:3101-3200batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=74.716, loss_att=52.707, acc=0.702, loss=59.310, backward_time=0.753, grad_norm=81.698, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.689e-05, train_time=2.050 +[gpua003:0/64] 2023-07-06 03:47:13,089 (trainer:732) INFO: 15epoch:train:3201-3300batch: iter_time=9.444e-05, forward_time=0.108, loss_ctc=77.867, loss_att=61.783, acc=0.691, loss=66.608, backward_time=0.759, grad_norm=98.749, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.685e-05, train_time=2.058 +[gpua003:0/64] 2023-07-06 03:47:53,832 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 03:48:12,960 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 03:48:16,513 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 03:48:16,514 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 03:48:16,520 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 03:53:07,778 (trainer:732) INFO: 15epoch:train:3301-3400batch: iter_time=1.891, forward_time=0.108, loss_ctc=80.670, loss_att=59.551, acc=0.693, loss=65.887, backward_time=0.768, grad_norm=91.581, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.682e-05, train_time=7.094 +[gpua003:0/64] 2023-07-06 03:54:48,045 (trainer:732) INFO: 15epoch:train:3401-3500batch: iter_time=1.099e-04, forward_time=0.109, loss_ctc=76.671, loss_att=54.588, acc=0.697, loss=61.213, backward_time=0.753, grad_norm=86.293, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.678e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 03:56:29,514 (trainer:732) INFO: 15epoch:train:3501-3600batch: iter_time=9.121e-05, forward_time=0.108, loss_ctc=83.637, loss_att=55.309, acc=0.702, loss=63.808, backward_time=0.753, grad_norm=89.277, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.674e-05, train_time=2.029 +[gpua003:0/64] 2023-07-06 03:58:09,348 (trainer:732) INFO: 15epoch:train:3601-3700batch: iter_time=9.560e-05, forward_time=0.108, loss_ctc=70.720, loss_att=55.269, acc=0.679, loss=59.904, backward_time=0.751, grad_norm=89.133, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.671e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 03:59:49,035 (trainer:732) INFO: 15epoch:train:3701-3800batch: iter_time=9.637e-05, forward_time=0.107, loss_ctc=77.221, loss_att=62.310, acc=0.690, loss=66.783, backward_time=0.751, grad_norm=96.452, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.667e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 04:01:31,489 (trainer:732) INFO: 15epoch:train:3801-3900batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=86.594, loss_att=65.894, acc=0.691, loss=72.104, backward_time=0.753, grad_norm=92.320, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.664e-05, train_time=2.049 +[gpua003:0/64] 2023-07-06 04:03:11,208 (trainer:732) INFO: 15epoch:train:3901-4000batch: iter_time=9.501e-05, forward_time=0.107, loss_ctc=78.169, loss_att=56.640, acc=0.693, loss=63.098, backward_time=0.750, grad_norm=89.150, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.660e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 04:04:51,122 (trainer:732) INFO: 15epoch:train:4001-4100batch: iter_time=9.624e-05, forward_time=0.107, loss_ctc=76.858, loss_att=56.884, acc=0.686, loss=62.876, backward_time=0.751, grad_norm=103.211, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.656e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 04:05:57,372 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 04:06:16,333 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 04:06:19,864 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 04:06:19,864 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 04:06:19,870 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 04:09:19,124 (trainer:732) INFO: 15epoch:train:4101-4200batch: iter_time=1.298, forward_time=0.107, loss_ctc=76.656, loss_att=57.378, acc=0.699, loss=63.161, backward_time=0.761, grad_norm=84.164, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.653e-05, train_time=5.360 +[gpua003:0/64] 2023-07-06 04:10:59,509 (trainer:732) INFO: 15epoch:train:4201-4300batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=77.716, loss_att=58.424, acc=0.694, loss=64.212, backward_time=0.754, grad_norm=86.815, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.649e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 04:12:39,763 (trainer:732) INFO: 15epoch:train:4301-4400batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=86.970, loss_att=57.258, acc=0.694, loss=66.171, backward_time=0.754, grad_norm=89.527, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.646e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 04:14:19,772 (trainer:732) INFO: 15epoch:train:4401-4500batch: iter_time=8.559e-05, forward_time=0.108, loss_ctc=70.095, loss_att=53.690, acc=0.681, loss=58.611, backward_time=0.754, grad_norm=78.614, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.642e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 04:15:59,517 (trainer:732) INFO: 15epoch:train:4501-4600batch: iter_time=9.013e-05, forward_time=0.107, loss_ctc=74.563, loss_att=61.596, acc=0.680, loss=65.486, backward_time=0.751, grad_norm=89.324, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.638e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 04:17:39,351 (trainer:732) INFO: 15epoch:train:4601-4700batch: iter_time=9.033e-05, forward_time=0.108, loss_ctc=83.925, loss_att=60.152, acc=0.693, loss=67.284, backward_time=0.752, grad_norm=90.456, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.635e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 04:19:19,250 (trainer:732) INFO: 15epoch:train:4701-4800batch: iter_time=9.522e-05, forward_time=0.108, loss_ctc=78.761, loss_att=63.200, acc=0.690, loss=67.868, backward_time=0.753, grad_norm=86.126, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.631e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 04:20:59,100 (trainer:732) INFO: 15epoch:train:4801-4900batch: iter_time=9.812e-05, forward_time=0.107, loss_ctc=74.851, loss_att=55.680, acc=0.694, loss=61.431, backward_time=0.752, grad_norm=103.100, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.628e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 04:22:39,137 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 04:22:58,402 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 04:23:01,981 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 04:23:01,981 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 04:23:01,987 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 04:26:15,035 (trainer:732) INFO: 15epoch:train:4901-5000batch: iter_time=1.298, forward_time=0.107, loss_ctc=79.639, loss_att=56.728, acc=0.693, loss=63.601, backward_time=0.757, grad_norm=91.688, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.624e-05, train_time=6.318 +[gpua003:0/64] 2023-07-06 04:27:57,014 (trainer:732) INFO: 15epoch:train:5001-5100batch: iter_time=1.188e-04, forward_time=0.110, loss_ctc=79.481, loss_att=59.825, acc=0.685, loss=65.722, backward_time=0.758, grad_norm=89.848, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.621e-05, train_time=2.039 +[gpua003:0/64] 2023-07-06 04:29:37,013 (trainer:732) INFO: 15epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=86.315, loss_att=56.327, acc=0.690, loss=65.324, backward_time=0.752, grad_norm=105.057, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.617e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 04:31:16,972 (trainer:732) INFO: 15epoch:train:5201-5300batch: iter_time=1.038e-04, forward_time=0.106, loss_ctc=70.861, loss_att=50.758, acc=0.689, loss=56.789, backward_time=0.752, grad_norm=87.606, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.614e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 04:32:56,687 (trainer:732) INFO: 15epoch:train:5301-5400batch: iter_time=9.220e-05, forward_time=0.106, loss_ctc=71.546, loss_att=60.197, acc=0.685, loss=63.601, backward_time=0.750, grad_norm=91.661, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.610e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 04:34:36,595 (trainer:732) INFO: 15epoch:train:5401-5500batch: iter_time=9.018e-05, forward_time=0.107, loss_ctc=82.998, loss_att=65.769, acc=0.666, loss=70.938, backward_time=0.752, grad_norm=109.581, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.606e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 04:36:16,255 (trainer:732) INFO: 15epoch:train:5501-5600batch: iter_time=9.943e-05, forward_time=0.107, loss_ctc=82.860, loss_att=64.257, acc=0.691, loss=69.838, backward_time=0.750, grad_norm=91.728, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.603e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 04:37:55,929 (trainer:732) INFO: 15epoch:train:5601-5700batch: iter_time=9.638e-05, forward_time=0.106, loss_ctc=74.731, loss_att=53.015, acc=0.702, loss=59.530, backward_time=0.751, grad_norm=90.537, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.599e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 04:39:35,714 (trainer:732) INFO: 15epoch:train:5701-5800batch: iter_time=9.332e-05, forward_time=0.107, loss_ctc=76.746, loss_att=60.403, acc=0.686, loss=65.306, backward_time=0.751, grad_norm=100.897, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.596e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 04:40:08,943 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 04:40:28,479 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 04:40:32,043 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 04:40:32,043 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 04:40:32,049 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 04:44:18,911 (trainer:732) INFO: 15epoch:train:5801-5900batch: iter_time=1.331, forward_time=0.108, loss_ctc=75.757, loss_att=54.746, acc=0.692, loss=61.049, backward_time=0.767, grad_norm=90.029, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.592e-05, train_time=5.664 +[gpua003:0/64] 2023-07-06 04:45:59,455 (trainer:732) INFO: 15epoch:train:5901-6000batch: iter_time=1.005e-04, forward_time=0.107, loss_ctc=80.541, loss_att=59.034, acc=0.691, loss=65.486, backward_time=0.753, grad_norm=101.034, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.589e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 04:47:39,684 (trainer:732) INFO: 15epoch:train:6001-6100batch: iter_time=9.840e-05, forward_time=0.107, loss_ctc=77.216, loss_att=50.560, acc=0.689, loss=58.557, backward_time=0.750, grad_norm=91.962, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.585e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 04:49:19,460 (trainer:732) INFO: 15epoch:train:6101-6200batch: iter_time=9.863e-05, forward_time=0.108, loss_ctc=70.142, loss_att=56.782, acc=0.679, loss=60.790, backward_time=0.751, grad_norm=87.925, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.582e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 04:50:59,293 (trainer:732) INFO: 15epoch:train:6201-6300batch: iter_time=9.683e-05, forward_time=0.107, loss_ctc=84.691, loss_att=67.527, acc=0.672, loss=72.676, backward_time=0.751, grad_norm=96.501, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.578e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 04:52:39,001 (trainer:732) INFO: 15epoch:train:6301-6400batch: iter_time=9.518e-05, forward_time=0.108, loss_ctc=78.908, loss_att=59.469, acc=0.696, loss=65.301, backward_time=0.751, grad_norm=90.460, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.575e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 04:54:18,553 (trainer:732) INFO: 15epoch:train:6401-6500batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=79.232, loss_att=59.510, acc=0.686, loss=65.427, backward_time=0.749, grad_norm=104.175, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.571e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 04:55:58,195 (trainer:732) INFO: 15epoch:train:6501-6600batch: iter_time=9.919e-05, forward_time=0.108, loss_ctc=74.355, loss_att=57.326, acc=0.685, loss=62.435, backward_time=0.750, grad_norm=89.442, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.568e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 04:57:05,281 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 04:57:24,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 04:57:27,875 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 04:57:27,875 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 04:57:27,881 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 05:03:01,395 (trainer:732) INFO: 15epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.108, loss_ctc=78.273, loss_att=57.077, acc=0.685, loss=63.436, backward_time=0.759, grad_norm=85.784, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.564e-05, train_time=8.464 +[gpua003:0/64] 2023-07-06 05:04:42,396 (trainer:732) INFO: 15epoch:train:6701-6800batch: iter_time=1.146e-04, forward_time=0.110, loss_ctc=75.552, loss_att=54.344, acc=0.708, loss=60.707, backward_time=0.754, grad_norm=87.661, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.561e-05, train_time=2.020 +[gpua003:0/64] 2023-07-06 05:06:22,783 (trainer:732) INFO: 15epoch:train:6801-6900batch: iter_time=1.151e-04, forward_time=0.109, loss_ctc=87.760, loss_att=56.616, acc=0.702, loss=65.959, backward_time=0.754, grad_norm=109.179, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.557e-05, train_time=2.008 +[gpua003:0/64] 2023-07-06 05:08:02,835 (trainer:732) INFO: 15epoch:train:6901-7000batch: iter_time=1.119e-04, forward_time=0.110, loss_ctc=68.674, loss_att=52.588, acc=0.694, loss=57.414, backward_time=0.753, grad_norm=86.799, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.554e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 05:09:42,653 (trainer:732) INFO: 15epoch:train:7001-7100batch: iter_time=1.118e-04, forward_time=0.109, loss_ctc=73.621, loss_att=61.316, acc=0.689, loss=65.008, backward_time=0.752, grad_norm=105.693, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.550e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 05:11:22,495 (trainer:732) INFO: 15epoch:train:7101-7200batch: iter_time=1.103e-04, forward_time=0.109, loss_ctc=83.133, loss_att=61.505, acc=0.686, loss=67.994, backward_time=0.752, grad_norm=104.402, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.547e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 05:13:02,248 (trainer:732) INFO: 15epoch:train:7201-7300batch: iter_time=1.110e-04, forward_time=0.109, loss_ctc=81.010, loss_att=64.819, acc=0.692, loss=69.677, backward_time=0.751, grad_norm=110.286, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.543e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 05:14:42,319 (trainer:732) INFO: 15epoch:train:7301-7400batch: iter_time=1.107e-04, forward_time=0.109, loss_ctc=72.689, loss_att=54.067, acc=0.697, loss=59.654, backward_time=0.752, grad_norm=85.938, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.540e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 05:16:26,124 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 05:16:45,225 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 05:16:48,776 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 05:16:48,777 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 05:16:48,783 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 05:21:21,795 (trainer:732) INFO: 15epoch:train:7401-7500batch: iter_time=1.282, forward_time=0.110, loss_ctc=75.686, loss_att=56.849, acc=0.702, loss=62.500, backward_time=0.778, grad_norm=88.301, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.536e-05, train_time=7.989 +[gpua003:0/64] 2023-07-06 05:23:04,740 (trainer:732) INFO: 15epoch:train:7501-7600batch: iter_time=1.225e-04, forward_time=0.109, loss_ctc=74.838, loss_att=55.781, acc=0.699, loss=61.498, backward_time=0.759, grad_norm=93.290, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.533e-05, train_time=2.059 +[gpua003:0/64] 2023-07-06 05:24:45,328 (trainer:732) INFO: 15epoch:train:7601-7700batch: iter_time=1.022e-04, forward_time=0.110, loss_ctc=83.499, loss_att=54.322, acc=0.699, loss=63.075, backward_time=0.754, grad_norm=90.328, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.529e-05, train_time=2.012 +[gpua003:0/64] 2023-07-06 05:26:25,161 (trainer:732) INFO: 15epoch:train:7701-7800batch: iter_time=1.134e-04, forward_time=0.109, loss_ctc=71.384, loss_att=51.519, acc=0.703, loss=57.479, backward_time=0.751, grad_norm=86.506, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.526e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 05:28:04,981 (trainer:732) INFO: 15epoch:train:7801-7900batch: iter_time=1.131e-04, forward_time=0.109, loss_ctc=73.904, loss_att=64.610, acc=0.679, loss=67.398, backward_time=0.751, grad_norm=102.632, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.522e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 05:29:44,880 (trainer:732) INFO: 15epoch:train:7901-8000batch: iter_time=1.167e-04, forward_time=0.109, loss_ctc=83.085, loss_att=62.060, acc=0.679, loss=68.368, backward_time=0.753, grad_norm=103.977, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.519e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 05:31:24,806 (trainer:732) INFO: 15epoch:train:8001-8100batch: iter_time=1.085e-04, forward_time=0.109, loss_ctc=80.413, loss_att=61.210, acc=0.698, loss=66.971, backward_time=0.752, grad_norm=96.822, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.516e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 05:33:06,003 (trainer:732) INFO: 15epoch:train:8101-8200batch: iter_time=9.574e-05, forward_time=0.109, loss_ctc=75.570, loss_att=55.287, acc=0.694, loss=61.372, backward_time=0.753, grad_norm=88.815, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.512e-05, train_time=2.024 +[gpua003:0/64] 2023-07-06 05:34:48,433 (trainer:732) INFO: 15epoch:train:8201-8300batch: iter_time=1.065e-04, forward_time=0.110, loss_ctc=73.115, loss_att=57.381, acc=0.707, loss=62.101, backward_time=0.755, grad_norm=84.564, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.509e-05, train_time=2.048 +[gpua003:0/64] 2023-07-06 05:35:22,627 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 05:35:42,157 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 05:35:45,995 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 05:35:45,995 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 05:35:46,001 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 05:40:45,752 (trainer:732) INFO: 15epoch:train:8301-8400batch: iter_time=1.326, forward_time=0.109, loss_ctc=77.487, loss_att=56.217, acc=0.681, loss=62.598, backward_time=0.787, grad_norm=82.284, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.505e-05, train_time=7.146 +[gpua003:0/64] 2023-07-06 05:42:30,027 (trainer:732) INFO: 15epoch:train:8401-8500batch: iter_time=1.096e-04, forward_time=0.109, loss_ctc=81.840, loss_att=60.268, acc=0.694, loss=66.740, backward_time=0.763, grad_norm=98.195, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.502e-05, train_time=2.085 +[gpua003:0/64] 2023-07-06 05:44:15,543 (trainer:732) INFO: 15epoch:train:8501-8600batch: iter_time=1.047e-04, forward_time=0.108, loss_ctc=78.760, loss_att=51.979, acc=0.691, loss=60.013, backward_time=0.768, grad_norm=92.124, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.498e-05, train_time=2.110 +[gpua003:0/64] 2023-07-06 05:46:05,064 (trainer:732) INFO: 15epoch:train:8601-8700batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=70.996, loss_att=57.564, acc=0.676, loss=61.593, backward_time=0.777, grad_norm=91.103, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.495e-05, train_time=2.190 +[gpua003:0/64] 2023-07-06 05:47:51,510 (trainer:732) INFO: 15epoch:train:8701-8800batch: iter_time=1.110e-04, forward_time=0.108, loss_ctc=80.422, loss_att=66.412, acc=0.672, loss=70.615, backward_time=0.757, grad_norm=94.400, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.491e-05, train_time=2.129 +[gpua003:0/64] 2023-07-06 05:49:31,215 (trainer:732) INFO: 15epoch:train:8801-8900batch: iter_time=1.153e-04, forward_time=0.108, loss_ctc=78.907, loss_att=60.674, acc=0.693, loss=66.144, backward_time=0.750, grad_norm=89.498, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.488e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 05:51:11,993 (trainer:732) INFO: 15epoch:train:8901-9000batch: iter_time=1.134e-04, forward_time=0.108, loss_ctc=76.299, loss_att=56.694, acc=0.694, loss=62.575, backward_time=0.751, grad_norm=82.090, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.485e-05, train_time=2.015 +[gpua003:0/64] 2023-07-06 05:52:52,430 (trainer:732) INFO: 15epoch:train:9001-9100batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=74.360, loss_att=57.538, acc=0.689, loss=62.585, backward_time=0.750, grad_norm=87.996, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.481e-05, train_time=2.009 +[gpua003:0/64] 2023-07-06 05:54:01,666 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 05:54:20,653 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 05:54:24,452 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 05:54:24,452 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 05:54:24,459 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 05:58:56,052 (trainer:732) INFO: 15epoch:train:9101-9200batch: iter_time=1.330, forward_time=0.135, loss_ctc=76.652, loss_att=55.319, acc=0.689, loss=61.719, backward_time=0.760, grad_norm=94.798, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.478e-05, train_time=7.272 +[gpua003:0/64] 2023-07-06 06:00:39,277 (trainer:732) INFO: 15epoch:train:9201-9300batch: iter_time=9.552e-05, forward_time=0.111, loss_ctc=77.373, loss_att=56.510, acc=0.706, loss=62.769, backward_time=0.757, grad_norm=98.925, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.474e-05, train_time=2.064 +[gpua003:0/64] 2023-07-06 06:02:26,540 (trainer:732) INFO: 15epoch:train:9301-9400batch: iter_time=1.017e-04, forward_time=0.109, loss_ctc=86.609, loss_att=55.256, acc=0.704, loss=64.662, backward_time=0.763, grad_norm=94.931, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.471e-05, train_time=2.145 +[gpua003:0/64] 2023-07-06 06:04:09,380 (trainer:732) INFO: 15epoch:train:9401-9500batch: iter_time=1.045e-04, forward_time=0.109, loss_ctc=69.832, loss_att=52.386, acc=0.693, loss=57.620, backward_time=0.753, grad_norm=89.310, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.468e-05, train_time=2.057 +[gpua003:0/64] 2023-07-06 06:05:51,252 (trainer:732) INFO: 15epoch:train:9501-9600batch: iter_time=1.037e-04, forward_time=0.109, loss_ctc=73.533, loss_att=62.841, acc=0.688, loss=66.049, backward_time=0.751, grad_norm=93.939, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.464e-05, train_time=2.037 +[gpua003:0/64] 2023-07-06 06:07:41,819 (trainer:732) INFO: 15epoch:train:9601-9700batch: iter_time=1.076e-04, forward_time=0.108, loss_ctc=83.150, loss_att=60.639, acc=0.687, loss=67.393, backward_time=0.774, grad_norm=94.514, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.461e-05, train_time=2.211 +[gpua003:0/64] 2023-07-06 06:09:23,340 (trainer:732) INFO: 15epoch:train:9701-9800batch: iter_time=1.286e-04, forward_time=0.108, loss_ctc=81.189, loss_att=65.286, acc=0.694, loss=70.057, backward_time=0.752, grad_norm=92.569, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.457e-05, train_time=2.030 +[gpua003:0/64] 2023-07-06 06:11:03,236 (trainer:732) INFO: 15epoch:train:9801-9900batch: iter_time=1.122e-04, forward_time=0.107, loss_ctc=72.362, loss_att=53.624, acc=0.694, loss=59.246, backward_time=0.751, grad_norm=87.692, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.111, optim0_lr0=9.454e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 06:12:42,895 (trainer:732) INFO: 15epoch:train:9901-10000batch: iter_time=1.061e-04, forward_time=0.107, loss_ctc=75.625, loss_att=55.477, acc=0.704, loss=61.522, backward_time=0.750, grad_norm=86.254, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.451e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 06:24:28,681 (trainer:338) INFO: 15epoch results: [train] iter_time=0.185, forward_time=0.110, loss_ctc=78.489, loss_att=58.649, acc=0.688, loss=64.601, backward_time=0.756, grad_norm=93.007, clip=100.000, loss_scale=7.318e+14, optim_step_time=0.112, optim0_lr0=9.624e-05, train_time=2.644, time=3 hours, 40 minutes and 32.98 seconds, total_count=120000, gpu_max_cached_mem_GB=37.770, [valid] loss_ctc=52.634, cer_ctc=0.298, loss_att=43.555, acc=0.657, cer=0.377, wer=0.991, loss=46.279, time=5 minutes and 28.62 seconds, total_count=12650, gpu_max_cached_mem_GB=37.770, [att_plot] time=6 minutes and 8.01 seconds, total_count=0, gpu_max_cached_mem_GB=37.770 +[gpua003:0/64] 2023-07-06 06:24:45,984 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua003:0/64] 2023-07-06 06:24:45,990 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/9epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/10epoch.pth +[gpua003:0/64] 2023-07-06 06:24:46,016 (trainer:272) INFO: 16/100epoch started. Estimated time to finish: 1 week, 6 days and 16 hours +[gpua003:0/64] 2023-07-06 06:24:46,836 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 06:25:05,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 06:25:10,550 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 06:25:10,550 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 06:25:10,617 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 06:31:07,295 (trainer:732) INFO: 16epoch:train:1-100batch: iter_time=2.752, forward_time=0.134, loss_ctc=75.117, loss_att=48.716, acc=0.689, loss=56.636, backward_time=0.771, grad_norm=101.201, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.116, optim0_lr0=9.447e-05, train_time=7.615 +[gpua003:0/64] 2023-07-06 06:32:48,088 (trainer:732) INFO: 16epoch:train:101-200batch: iter_time=1.138e-04, forward_time=0.108, loss_ctc=74.028, loss_att=52.520, acc=0.697, loss=58.972, backward_time=0.754, grad_norm=94.459, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.444e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 06:34:29,724 (trainer:732) INFO: 16epoch:train:201-300batch: iter_time=1.056e-04, forward_time=0.109, loss_ctc=64.990, loss_att=51.033, acc=0.694, loss=55.220, backward_time=0.752, grad_norm=82.414, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.441e-05, train_time=2.033 +[gpua003:0/64] 2023-07-06 06:36:17,878 (trainer:732) INFO: 16epoch:train:301-400batch: iter_time=1.070e-04, forward_time=0.108, loss_ctc=76.120, loss_att=59.374, acc=0.683, loss=64.398, backward_time=0.760, grad_norm=93.942, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.437e-05, train_time=2.163 +[gpua003:0/64] 2023-07-06 06:38:00,826 (trainer:732) INFO: 16epoch:train:401-500batch: iter_time=1.101e-04, forward_time=0.107, loss_ctc=67.187, loss_att=49.787, acc=0.695, loss=55.007, backward_time=0.754, grad_norm=82.026, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.434e-05, train_time=2.059 +[gpua003:0/64] 2023-07-06 06:39:53,617 (trainer:732) INFO: 16epoch:train:501-600batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=81.351, loss_att=59.096, acc=0.689, loss=65.772, backward_time=0.772, grad_norm=90.956, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.431e-05, train_time=2.256 +[gpua003:0/64] 2023-07-06 06:41:43,234 (trainer:732) INFO: 16epoch:train:601-700batch: iter_time=9.752e-05, forward_time=0.108, loss_ctc=87.613, loss_att=64.737, acc=0.688, loss=71.600, backward_time=0.776, grad_norm=130.042, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.427e-05, train_time=2.192 +[gpua003:0/64] 2023-07-06 06:43:33,517 (trainer:732) INFO: 16epoch:train:701-800batch: iter_time=1.003e-04, forward_time=0.109, loss_ctc=72.526, loss_att=59.461, acc=0.682, loss=63.381, backward_time=0.768, grad_norm=92.190, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.424e-05, train_time=2.205 +[gpua003:0/64] 2023-07-06 06:44:14,263 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 06:44:33,081 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 06:44:36,840 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 06:44:36,840 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 06:44:36,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 06:48:21,733 (trainer:732) INFO: 16epoch:train:801-900batch: iter_time=1.321, forward_time=0.107, loss_ctc=80.667, loss_att=58.981, acc=0.688, loss=65.487, backward_time=0.768, grad_norm=96.967, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.420e-05, train_time=5.764 +[gpua003:0/64] 2023-07-06 06:50:06,977 (trainer:732) INFO: 16epoch:train:901-1000batch: iter_time=9.995e-05, forward_time=0.107, loss_ctc=71.004, loss_att=50.281, acc=0.699, loss=56.498, backward_time=0.758, grad_norm=96.342, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.417e-05, train_time=2.105 +[gpua003:0/64] 2023-07-06 06:51:47,247 (trainer:732) INFO: 16epoch:train:1001-1100batch: iter_time=1.007e-04, forward_time=0.107, loss_ctc=62.698, loss_att=47.891, acc=0.697, loss=52.333, backward_time=0.752, grad_norm=74.518, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.414e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 06:53:27,011 (trainer:732) INFO: 16epoch:train:1101-1200batch: iter_time=1.048e-04, forward_time=0.108, loss_ctc=75.410, loss_att=57.557, acc=0.687, loss=62.913, backward_time=0.751, grad_norm=83.906, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.410e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 06:55:06,788 (trainer:732) INFO: 16epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.107, loss_ctc=64.005, loss_att=47.055, acc=0.700, loss=52.140, backward_time=0.751, grad_norm=79.784, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.407e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 06:56:46,657 (trainer:732) INFO: 16epoch:train:1301-1400batch: iter_time=9.661e-05, forward_time=0.107, loss_ctc=81.759, loss_att=59.701, acc=0.678, loss=66.319, backward_time=0.751, grad_norm=92.222, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.404e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 06:58:26,430 (trainer:732) INFO: 16epoch:train:1401-1500batch: iter_time=9.571e-05, forward_time=0.107, loss_ctc=82.212, loss_att=60.520, acc=0.699, loss=67.027, backward_time=0.752, grad_norm=85.514, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.400e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:00:06,377 (trainer:732) INFO: 16epoch:train:1501-1600batch: iter_time=9.767e-05, forward_time=0.108, loss_ctc=70.485, loss_att=56.946, acc=0.687, loss=61.008, backward_time=0.752, grad_norm=93.511, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.397e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 07:01:14,601 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 07:01:33,950 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 07:01:37,876 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 07:01:37,876 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 07:01:37,882 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 07:05:32,440 (trainer:732) INFO: 16epoch:train:1601-1700batch: iter_time=1.290, forward_time=0.108, loss_ctc=76.855, loss_att=60.487, acc=0.696, loss=65.397, backward_time=0.761, grad_norm=104.000, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.394e-05, train_time=6.521 +[gpua003:0/64] 2023-07-06 07:07:12,660 (trainer:732) INFO: 16epoch:train:1701-1800batch: iter_time=9.317e-05, forward_time=0.107, loss_ctc=68.609, loss_att=46.164, acc=0.700, loss=52.897, backward_time=0.753, grad_norm=92.800, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.391e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 07:08:52,753 (trainer:732) INFO: 16epoch:train:1801-1900batch: iter_time=8.951e-05, forward_time=0.107, loss_ctc=67.542, loss_att=52.449, acc=0.694, loss=56.977, backward_time=0.752, grad_norm=85.811, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.387e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 07:10:32,527 (trainer:732) INFO: 16epoch:train:1901-2000batch: iter_time=8.930e-05, forward_time=0.107, loss_ctc=69.790, loss_att=55.626, acc=0.680, loss=59.875, backward_time=0.751, grad_norm=78.275, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.384e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:12:12,369 (trainer:732) INFO: 16epoch:train:2001-2100batch: iter_time=1.018e-04, forward_time=0.108, loss_ctc=69.942, loss_att=52.418, acc=0.688, loss=57.675, backward_time=0.752, grad_norm=83.432, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.381e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 07:13:52,056 (trainer:732) INFO: 16epoch:train:2101-2200batch: iter_time=1.072e-04, forward_time=0.108, loss_ctc=66.694, loss_att=50.173, acc=0.698, loss=55.129, backward_time=0.752, grad_norm=74.273, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.377e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 07:15:31,827 (trainer:732) INFO: 16epoch:train:2201-2300batch: iter_time=1.030e-04, forward_time=0.109, loss_ctc=86.699, loss_att=65.479, acc=0.678, loss=71.845, backward_time=0.753, grad_norm=112.678, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.374e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:17:11,502 (trainer:732) INFO: 16epoch:train:2301-2400batch: iter_time=1.009e-04, forward_time=0.109, loss_ctc=78.962, loss_att=61.486, acc=0.676, loss=66.729, backward_time=0.752, grad_norm=92.485, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.371e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 07:18:51,509 (trainer:732) INFO: 16epoch:train:2401-2500batch: iter_time=1.015e-04, forward_time=0.110, loss_ctc=75.890, loss_att=62.591, acc=0.690, loss=66.581, backward_time=0.754, grad_norm=93.157, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.367e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 07:18:52,779 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 07:19:12,128 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 07:19:15,913 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 07:19:15,913 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 07:19:15,919 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 07:24:48,132 (trainer:732) INFO: 16epoch:train:2501-2600batch: iter_time=1.279, forward_time=0.108, loss_ctc=71.726, loss_att=46.606, acc=0.697, loss=54.142, backward_time=0.767, grad_norm=105.412, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.364e-05, train_time=7.132 +[gpua003:0/64] 2023-07-06 07:26:27,825 (trainer:732) INFO: 16epoch:train:2601-2700batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=66.631, loss_att=51.594, acc=0.698, loss=56.105, backward_time=0.752, grad_norm=90.533, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.361e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 07:28:07,530 (trainer:732) INFO: 16epoch:train:2701-2800batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=70.966, loss_att=55.570, acc=0.680, loss=60.189, backward_time=0.752, grad_norm=86.466, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.358e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 07:29:47,312 (trainer:732) INFO: 16epoch:train:2801-2900batch: iter_time=1.072e-04, forward_time=0.108, loss_ctc=71.097, loss_att=53.450, acc=0.675, loss=58.744, backward_time=0.753, grad_norm=93.208, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.354e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:31:27,133 (trainer:732) INFO: 16epoch:train:2901-3000batch: iter_time=8.955e-05, forward_time=0.108, loss_ctc=63.821, loss_att=46.903, acc=0.701, loss=51.979, backward_time=0.752, grad_norm=74.418, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.351e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 07:33:06,795 (trainer:732) INFO: 16epoch:train:3001-3100batch: iter_time=9.687e-05, forward_time=0.108, loss_ctc=82.866, loss_att=60.932, acc=0.685, loss=67.512, backward_time=0.752, grad_norm=97.470, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.348e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 07:34:46,471 (trainer:732) INFO: 16epoch:train:3101-3200batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=82.047, loss_att=62.386, acc=0.681, loss=68.285, backward_time=0.752, grad_norm=94.406, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.344e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 07:36:26,118 (trainer:732) INFO: 16epoch:train:3201-3300batch: iter_time=9.042e-05, forward_time=0.108, loss_ctc=77.669, loss_att=65.484, acc=0.671, loss=69.140, backward_time=0.752, grad_norm=91.421, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.341e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 07:37:01,224 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 07:37:20,629 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 07:37:24,141 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 07:37:24,141 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 07:37:24,147 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 07:41:53,320 (trainer:732) INFO: 16epoch:train:3301-3400batch: iter_time=2.180, forward_time=0.108, loss_ctc=78.290, loss_att=55.680, acc=0.688, loss=62.463, backward_time=0.766, grad_norm=91.637, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.338e-05, train_time=6.544 +[gpua003:0/64] 2023-07-06 07:43:33,605 (trainer:732) INFO: 16epoch:train:3401-3500batch: iter_time=1.037e-04, forward_time=0.107, loss_ctc=68.344, loss_att=49.906, acc=0.712, loss=55.438, backward_time=0.754, grad_norm=81.948, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.335e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 07:45:13,347 (trainer:732) INFO: 16epoch:train:3501-3600batch: iter_time=9.421e-05, forward_time=0.108, loss_ctc=65.417, loss_att=50.435, acc=0.688, loss=54.930, backward_time=0.752, grad_norm=90.718, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.331e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:46:53,165 (trainer:732) INFO: 16epoch:train:3601-3700batch: iter_time=1.013e-04, forward_time=0.109, loss_ctc=70.603, loss_att=51.998, acc=0.686, loss=57.579, backward_time=0.751, grad_norm=85.036, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.328e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 07:48:33,178 (trainer:732) INFO: 16epoch:train:3701-3800batch: iter_time=1.098e-04, forward_time=0.109, loss_ctc=65.655, loss_att=51.080, acc=0.697, loss=55.453, backward_time=0.753, grad_norm=88.931, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.325e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 07:50:12,906 (trainer:732) INFO: 16epoch:train:3801-3900batch: iter_time=9.838e-05, forward_time=0.108, loss_ctc=74.297, loss_att=55.407, acc=0.684, loss=61.074, backward_time=0.751, grad_norm=82.975, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.322e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 07:51:56,731 (trainer:732) INFO: 16epoch:train:3901-4000batch: iter_time=9.921e-05, forward_time=0.109, loss_ctc=84.998, loss_att=61.759, acc=0.691, loss=68.731, backward_time=0.760, grad_norm=94.665, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.318e-05, train_time=2.076 +[gpua003:0/64] 2023-07-06 07:53:42,883 (trainer:732) INFO: 16epoch:train:4001-4100batch: iter_time=1.143e-04, forward_time=0.109, loss_ctc=75.067, loss_att=62.875, acc=0.670, loss=66.533, backward_time=0.758, grad_norm=91.761, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.315e-05, train_time=2.123 +[gpua003:0/64] 2023-07-06 07:54:50,953 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 07:55:10,233 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 07:55:13,721 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 07:55:13,721 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 07:55:13,728 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 07:58:55,908 (trainer:732) INFO: 16epoch:train:4101-4200batch: iter_time=2.076, forward_time=0.141, loss_ctc=74.642, loss_att=55.845, acc=0.693, loss=61.484, backward_time=0.761, grad_norm=97.003, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.114, optim0_lr0=9.312e-05, train_time=6.260 +[gpua003:0/64] 2023-07-06 08:00:37,112 (trainer:732) INFO: 16epoch:train:4201-4300batch: iter_time=1.039e-04, forward_time=0.107, loss_ctc=67.813, loss_att=44.642, acc=0.709, loss=51.593, backward_time=0.754, grad_norm=88.191, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.111, optim0_lr0=9.309e-05, train_time=2.024 +[gpua003:0/64] 2023-07-06 08:02:16,904 (trainer:732) INFO: 16epoch:train:4301-4400batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=68.215, loss_att=52.130, acc=0.696, loss=56.955, backward_time=0.752, grad_norm=94.395, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.306e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 08:03:56,527 (trainer:732) INFO: 16epoch:train:4401-4500batch: iter_time=9.344e-05, forward_time=0.106, loss_ctc=69.906, loss_att=54.891, acc=0.683, loss=59.395, backward_time=0.751, grad_norm=83.039, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.302e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 08:05:37,831 (trainer:732) INFO: 16epoch:train:4501-4600batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=68.748, loss_att=51.394, acc=0.689, loss=56.600, backward_time=0.755, grad_norm=93.992, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.299e-05, train_time=2.026 +[gpua003:0/64] 2023-07-06 08:07:17,508 (trainer:732) INFO: 16epoch:train:4601-4700batch: iter_time=1.059e-04, forward_time=0.107, loss_ctc=65.443, loss_att=48.597, acc=0.702, loss=53.651, backward_time=0.752, grad_norm=111.503, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.296e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 08:08:57,106 (trainer:732) INFO: 16epoch:train:4701-4800batch: iter_time=1.018e-04, forward_time=0.107, loss_ctc=84.020, loss_att=64.596, acc=0.679, loss=70.423, backward_time=0.751, grad_norm=127.671, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.293e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 08:10:36,976 (trainer:732) INFO: 16epoch:train:4801-4900batch: iter_time=1.060e-04, forward_time=0.108, loss_ctc=76.921, loss_att=58.835, acc=0.684, loss=64.261, backward_time=0.752, grad_norm=114.068, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.289e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 08:12:16,723 (trainer:732) INFO: 16epoch:train:4901-5000batch: iter_time=1.049e-04, forward_time=0.109, loss_ctc=74.951, loss_att=61.324, acc=0.694, loss=65.412, backward_time=0.751, grad_norm=92.245, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.286e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 08:12:18,610 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 08:12:37,768 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 08:12:41,328 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 08:12:41,328 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 08:12:41,335 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 08:18:49,163 (trainer:732) INFO: 16epoch:train:5001-5100batch: iter_time=1.303, forward_time=0.116, loss_ctc=72.397, loss_att=47.284, acc=0.706, loss=54.818, backward_time=0.768, grad_norm=87.352, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.283e-05, train_time=7.848 +[gpua003:0/64] 2023-07-06 08:20:28,927 (trainer:732) INFO: 16epoch:train:5101-5200batch: iter_time=1.050e-04, forward_time=0.109, loss_ctc=69.999, loss_att=51.235, acc=0.705, loss=56.864, backward_time=0.750, grad_norm=90.609, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.280e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 08:22:09,028 (trainer:732) INFO: 16epoch:train:5201-5300batch: iter_time=8.618e-05, forward_time=0.109, loss_ctc=62.913, loss_att=49.293, acc=0.704, loss=53.379, backward_time=0.753, grad_norm=77.097, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.277e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 08:23:48,933 (trainer:732) INFO: 16epoch:train:5301-5400batch: iter_time=8.628e-05, forward_time=0.109, loss_ctc=72.795, loss_att=55.439, acc=0.694, loss=60.646, backward_time=0.752, grad_norm=88.288, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.273e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 08:25:28,752 (trainer:732) INFO: 16epoch:train:5401-5500batch: iter_time=9.338e-05, forward_time=0.109, loss_ctc=64.718, loss_att=47.439, acc=0.702, loss=52.623, backward_time=0.752, grad_norm=82.629, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.270e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 08:27:08,812 (trainer:732) INFO: 16epoch:train:5501-5600batch: iter_time=8.449e-05, forward_time=0.108, loss_ctc=78.843, loss_att=57.892, acc=0.696, loss=64.177, backward_time=0.754, grad_norm=92.290, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.267e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 08:28:55,560 (trainer:732) INFO: 16epoch:train:5601-5700batch: iter_time=8.973e-05, forward_time=0.107, loss_ctc=83.966, loss_att=62.853, acc=0.700, loss=69.187, backward_time=0.778, grad_norm=96.335, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.264e-05, train_time=2.135 +[gpua003:0/64] 2023-07-06 08:30:43,294 (trainer:732) INFO: 16epoch:train:5701-5800batch: iter_time=8.613e-05, forward_time=0.108, loss_ctc=69.962, loss_att=57.568, acc=0.690, loss=61.286, backward_time=0.762, grad_norm=89.847, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.261e-05, train_time=2.154 +[gpua003:0/64] 2023-07-06 08:31:17,439 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 08:31:36,652 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 08:31:40,135 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 08:31:40,136 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 08:31:40,142 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 08:35:45,247 (trainer:732) INFO: 16epoch:train:5801-5900batch: iter_time=1.276, forward_time=0.107, loss_ctc=80.360, loss_att=57.972, acc=0.693, loss=64.689, backward_time=0.765, grad_norm=106.653, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.258e-05, train_time=6.039 +[gpua003:0/64] 2023-07-06 08:37:25,600 (trainer:732) INFO: 16epoch:train:5901-6000batch: iter_time=9.151e-05, forward_time=0.107, loss_ctc=69.337, loss_att=47.625, acc=0.723, loss=54.139, backward_time=0.753, grad_norm=82.017, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.254e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 08:39:05,439 (trainer:732) INFO: 16epoch:train:6001-6100batch: iter_time=9.170e-05, forward_time=0.107, loss_ctc=65.149, loss_att=50.375, acc=0.696, loss=54.807, backward_time=0.752, grad_norm=92.310, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.251e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 08:40:45,317 (trainer:732) INFO: 16epoch:train:6101-6200batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=70.697, loss_att=51.205, acc=0.700, loss=57.052, backward_time=0.753, grad_norm=86.258, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.248e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 08:42:24,960 (trainer:732) INFO: 16epoch:train:6201-6300batch: iter_time=9.827e-05, forward_time=0.108, loss_ctc=64.757, loss_att=50.584, acc=0.702, loss=54.836, backward_time=0.751, grad_norm=79.471, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.245e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 08:44:04,631 (trainer:732) INFO: 16epoch:train:6301-6400batch: iter_time=1.131e-04, forward_time=0.109, loss_ctc=72.490, loss_att=54.788, acc=0.696, loss=60.098, backward_time=0.751, grad_norm=92.689, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.242e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 08:45:44,133 (trainer:732) INFO: 16epoch:train:6401-6500batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=85.155, loss_att=58.768, acc=0.706, loss=66.684, backward_time=0.749, grad_norm=98.779, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.239e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 08:47:23,850 (trainer:732) INFO: 16epoch:train:6501-6600batch: iter_time=1.083e-04, forward_time=0.109, loss_ctc=73.711, loss_att=60.604, acc=0.688, loss=64.536, backward_time=0.751, grad_norm=84.467, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.235e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 08:48:30,857 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 08:48:50,019 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 08:48:53,511 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 08:48:53,512 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 08:48:53,518 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 08:53:20,677 (trainer:732) INFO: 16epoch:train:6601-6700batch: iter_time=1.289, forward_time=0.108, loss_ctc=74.070, loss_att=54.401, acc=0.697, loss=60.302, backward_time=0.761, grad_norm=86.767, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.232e-05, train_time=7.136 +[gpua003:0/64] 2023-07-06 08:55:01,310 (trainer:732) INFO: 16epoch:train:6701-6800batch: iter_time=1.138e-04, forward_time=0.107, loss_ctc=71.082, loss_att=51.771, acc=0.709, loss=57.564, backward_time=0.755, grad_norm=90.309, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.229e-05, train_time=2.012 +[gpua003:0/64] 2023-07-06 08:56:41,532 (trainer:732) INFO: 16epoch:train:6801-6900batch: iter_time=1.140e-04, forward_time=0.108, loss_ctc=65.828, loss_att=50.568, acc=0.693, loss=55.146, backward_time=0.752, grad_norm=92.849, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.226e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 08:58:21,807 (trainer:732) INFO: 16epoch:train:6901-7000batch: iter_time=1.105e-04, forward_time=0.107, loss_ctc=69.044, loss_att=51.957, acc=0.681, loss=57.083, backward_time=0.751, grad_norm=85.863, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.223e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 09:00:01,383 (trainer:732) INFO: 16epoch:train:7001-7100batch: iter_time=1.136e-04, forward_time=0.108, loss_ctc=66.173, loss_att=51.594, acc=0.692, loss=55.968, backward_time=0.751, grad_norm=107.161, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.220e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 09:01:41,306 (trainer:732) INFO: 16epoch:train:7101-7200batch: iter_time=1.196e-04, forward_time=0.108, loss_ctc=68.998, loss_att=50.099, acc=0.697, loss=55.769, backward_time=0.751, grad_norm=84.671, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.217e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 09:03:21,067 (trainer:732) INFO: 16epoch:train:7201-7300batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=83.622, loss_att=62.831, acc=0.690, loss=69.069, backward_time=0.751, grad_norm=94.437, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.213e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 09:05:00,740 (trainer:732) INFO: 16epoch:train:7301-7400batch: iter_time=1.076e-04, forward_time=0.108, loss_ctc=76.063, loss_att=61.450, acc=0.677, loss=65.834, backward_time=0.751, grad_norm=89.194, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.210e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 09:06:40,479 (trainer:732) INFO: 16epoch:train:7401-7500batch: iter_time=1.026e-04, forward_time=0.108, loss_ctc=74.348, loss_att=63.331, acc=0.691, loss=66.636, backward_time=0.752, grad_norm=94.746, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.207e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 09:06:50,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 09:07:09,872 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 09:07:13,343 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 09:07:13,343 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 09:07:13,350 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 09:12:31,328 (trainer:732) INFO: 16epoch:train:7501-7600batch: iter_time=1.366, forward_time=0.109, loss_ctc=70.718, loss_att=46.675, acc=0.709, loss=53.888, backward_time=0.765, grad_norm=84.044, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.204e-05, train_time=7.017 +[gpua003:0/64] 2023-07-06 09:14:11,524 (trainer:732) INFO: 16epoch:train:7601-7700batch: iter_time=1.096e-04, forward_time=0.109, loss_ctc=71.556, loss_att=49.715, acc=0.711, loss=56.267, backward_time=0.752, grad_norm=84.595, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.201e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 09:15:55,971 (trainer:732) INFO: 16epoch:train:7701-7800batch: iter_time=1.048e-04, forward_time=0.110, loss_ctc=63.324, loss_att=50.467, acc=0.700, loss=54.324, backward_time=0.762, grad_norm=79.834, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.198e-05, train_time=2.089 +[gpua003:0/64] 2023-07-06 09:17:38,069 (trainer:732) INFO: 16epoch:train:7801-7900batch: iter_time=1.107e-04, forward_time=0.109, loss_ctc=74.072, loss_att=55.493, acc=0.692, loss=61.067, backward_time=0.764, grad_norm=89.346, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.195e-05, train_time=2.042 +[gpua003:0/64] 2023-07-06 09:19:24,407 (trainer:732) INFO: 16epoch:train:7901-8000batch: iter_time=1.124e-04, forward_time=0.109, loss_ctc=63.418, loss_att=47.702, acc=0.707, loss=52.417, backward_time=0.766, grad_norm=74.133, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.192e-05, train_time=2.127 +[gpua003:0/64] 2023-07-06 09:21:04,320 (trainer:732) INFO: 16epoch:train:8001-8100batch: iter_time=1.101e-04, forward_time=0.109, loss_ctc=80.173, loss_att=56.871, acc=0.698, loss=63.862, backward_time=0.751, grad_norm=90.058, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.189e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 09:22:51,435 (trainer:732) INFO: 16epoch:train:8101-8200batch: iter_time=1.128e-04, forward_time=0.109, loss_ctc=82.878, loss_att=61.648, acc=0.703, loss=68.017, backward_time=0.757, grad_norm=96.301, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.185e-05, train_time=2.142 +[gpua003:0/64] 2023-07-06 09:24:32,016 (trainer:732) INFO: 16epoch:train:8201-8300batch: iter_time=1.174e-04, forward_time=0.109, loss_ctc=70.061, loss_att=57.659, acc=0.693, loss=61.380, backward_time=0.750, grad_norm=90.693, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.182e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 09:25:08,632 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 09:25:27,808 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 09:25:31,292 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 09:25:31,292 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 09:25:31,298 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 09:30:43,049 (trainer:732) INFO: 16epoch:train:8301-8400batch: iter_time=1.323, forward_time=0.109, loss_ctc=78.228, loss_att=55.459, acc=0.696, loss=62.290, backward_time=0.766, grad_norm=102.805, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.179e-05, train_time=7.420 +[gpua003:0/64] 2023-07-06 09:32:24,140 (trainer:732) INFO: 16epoch:train:8401-8500batch: iter_time=1.051e-04, forward_time=0.107, loss_ctc=65.900, loss_att=49.152, acc=0.721, loss=54.176, backward_time=0.753, grad_norm=86.699, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.176e-05, train_time=2.022 +[gpua003:0/64] 2023-07-06 09:34:03,912 (trainer:732) INFO: 16epoch:train:8501-8600batch: iter_time=9.517e-05, forward_time=0.107, loss_ctc=64.248, loss_att=49.488, acc=0.696, loss=53.916, backward_time=0.752, grad_norm=92.458, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.173e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 09:35:44,278 (trainer:732) INFO: 16epoch:train:8601-8700batch: iter_time=1.134e-04, forward_time=0.109, loss_ctc=68.813, loss_att=50.173, acc=0.693, loss=55.765, backward_time=0.753, grad_norm=79.140, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.170e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 09:37:24,339 (trainer:732) INFO: 16epoch:train:8701-8800batch: iter_time=1.014e-04, forward_time=0.107, loss_ctc=63.927, loss_att=49.180, acc=0.703, loss=53.604, backward_time=0.752, grad_norm=74.079, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.167e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 09:39:04,178 (trainer:732) INFO: 16epoch:train:8801-8900batch: iter_time=9.886e-05, forward_time=0.107, loss_ctc=71.856, loss_att=54.471, acc=0.690, loss=59.686, backward_time=0.751, grad_norm=84.227, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.164e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 09:40:44,106 (trainer:732) INFO: 16epoch:train:8901-9000batch: iter_time=9.990e-05, forward_time=0.108, loss_ctc=83.183, loss_att=60.219, acc=0.697, loss=67.108, backward_time=0.752, grad_norm=105.843, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.161e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 09:42:23,729 (trainer:732) INFO: 16epoch:train:9001-9100batch: iter_time=1.063e-04, forward_time=0.107, loss_ctc=73.879, loss_att=62.193, acc=0.675, loss=65.699, backward_time=0.750, grad_norm=87.852, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.158e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 09:43:30,780 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 09:43:50,013 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 09:43:53,960 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 09:43:53,960 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 09:43:53,967 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 09:47:02,373 (trainer:732) INFO: 16epoch:train:9101-9200batch: iter_time=1.335, forward_time=0.108, loss_ctc=73.193, loss_att=55.038, acc=0.695, loss=60.485, backward_time=0.764, grad_norm=100.991, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.155e-05, train_time=5.573 +[gpua003:0/64] 2023-07-06 09:48:42,805 (trainer:732) INFO: 16epoch:train:9201-9300batch: iter_time=1.082e-04, forward_time=0.108, loss_ctc=70.916, loss_att=49.501, acc=0.717, loss=55.926, backward_time=0.755, grad_norm=111.947, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.152e-05, train_time=2.008 +[gpua003:0/64] 2023-07-06 09:50:23,695 (trainer:732) INFO: 16epoch:train:9301-9400batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=65.130, loss_att=48.335, acc=0.700, loss=53.374, backward_time=0.753, grad_norm=76.035, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.148e-05, train_time=2.018 +[gpua003:0/64] 2023-07-06 09:52:03,613 (trainer:732) INFO: 16epoch:train:9401-9500batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=69.151, loss_att=50.398, acc=0.699, loss=56.024, backward_time=0.752, grad_norm=82.318, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.111, optim0_lr0=9.145e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 09:53:43,866 (trainer:732) INFO: 16epoch:train:9501-9600batch: iter_time=9.826e-05, forward_time=0.108, loss_ctc=64.096, loss_att=50.574, acc=0.700, loss=54.631, backward_time=0.752, grad_norm=76.082, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.142e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 09:55:23,809 (trainer:732) INFO: 16epoch:train:9601-9700batch: iter_time=1.055e-04, forward_time=0.107, loss_ctc=68.502, loss_att=50.633, acc=0.705, loss=55.994, backward_time=0.752, grad_norm=90.453, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.139e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 09:57:03,795 (trainer:732) INFO: 16epoch:train:9701-9800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=83.554, loss_att=60.263, acc=0.706, loss=67.250, backward_time=0.752, grad_norm=121.058, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.136e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 09:58:43,540 (trainer:732) INFO: 16epoch:train:9801-9900batch: iter_time=1.058e-04, forward_time=0.107, loss_ctc=75.767, loss_att=61.630, acc=0.686, loss=65.871, backward_time=0.752, grad_norm=99.766, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.133e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 10:00:23,449 (trainer:732) INFO: 16epoch:train:9901-10000batch: iter_time=1.125e-04, forward_time=0.109, loss_ctc=73.492, loss_att=62.177, acc=0.702, loss=65.572, backward_time=0.752, grad_norm=87.104, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.130e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 10:13:27,455 (trainer:338) INFO: 16epoch results: [train] iter_time=0.188, forward_time=0.109, loss_ctc=72.648, loss_att=54.689, acc=0.694, loss=60.077, backward_time=0.755, grad_norm=91.310, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.286e-05, train_time=2.587, time=3 hours, 35 minutes and 51.24 seconds, total_count=130000, gpu_max_cached_mem_GB=37.775, [valid] loss_ctc=53.067, cer_ctc=0.290, loss_att=44.062, acc=0.664, cer=0.358, wer=0.991, loss=46.763, time=6 minutes and 19.52 seconds, total_count=13662, gpu_max_cached_mem_GB=37.775, [att_plot] time=6 minutes and 30.6 seconds, total_count=0, gpu_max_cached_mem_GB=37.775 +[gpua003:0/64] 2023-07-06 10:13:43,422 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua003:0/64] 2023-07-06 10:13:43,465 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/11epoch.pth +[gpua003:0/64] 2023-07-06 10:13:43,465 (trainer:272) INFO: 17/100epoch started. Estimated time to finish: 1 week, 6 days and 10 hours +[gpua003:0/64] 2023-07-06 10:13:43,501 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 10:14:02,307 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 10:14:05,803 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 10:14:05,804 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 10:14:05,819 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 10:20:06,308 (trainer:732) INFO: 17epoch:train:1-100batch: iter_time=2.583, forward_time=0.213, loss_ctc=70.673, loss_att=58.961, acc=0.684, loss=62.475, backward_time=0.820, grad_norm=95.879, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.123, optim0_lr0=9.127e-05, train_time=7.656 +[gpua003:0/64] 2023-07-06 10:21:47,983 (trainer:732) INFO: 17epoch:train:101-200batch: iter_time=1.078e-04, forward_time=0.110, loss_ctc=72.857, loss_att=65.701, acc=0.683, loss=67.848, backward_time=0.753, grad_norm=87.783, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.124e-05, train_time=2.034 +[gpua003:0/64] 2023-07-06 10:23:43,718 (trainer:732) INFO: 17epoch:train:201-300batch: iter_time=3.269e-04, forward_time=0.202, loss_ctc=74.298, loss_att=62.216, acc=0.684, loss=65.841, backward_time=0.776, grad_norm=85.905, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.119, optim0_lr0=9.121e-05, train_time=2.314 +[gpua003:0/64] 2023-07-06 10:25:26,949 (trainer:732) INFO: 17epoch:train:301-400batch: iter_time=9.790e-05, forward_time=0.108, loss_ctc=72.162, loss_att=54.531, acc=0.685, loss=59.820, backward_time=0.756, grad_norm=86.861, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.118e-05, train_time=2.064 +[gpua003:0/64] 2023-07-06 10:27:13,540 (trainer:732) INFO: 17epoch:train:401-500batch: iter_time=1.134e-04, forward_time=0.120, loss_ctc=74.989, loss_att=52.743, acc=0.701, loss=59.416, backward_time=0.763, grad_norm=88.929, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.117, optim0_lr0=9.115e-05, train_time=2.132 +[gpua003:0/64] 2023-07-06 10:29:07,314 (trainer:732) INFO: 17epoch:train:501-600batch: iter_time=0.006, forward_time=0.165, loss_ctc=74.139, loss_att=57.626, acc=0.683, loss=62.580, backward_time=0.774, grad_norm=89.735, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.116, optim0_lr0=9.112e-05, train_time=2.275 +[gpua003:0/64] 2023-07-06 10:31:07,776 (trainer:732) INFO: 17epoch:train:601-700batch: iter_time=9.955e-05, forward_time=0.110, loss_ctc=75.682, loss_att=59.754, acc=0.684, loss=64.533, backward_time=0.796, grad_norm=95.065, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.109e-05, train_time=2.409 +[gpua003:0/64] 2023-07-06 10:33:08,425 (trainer:732) INFO: 17epoch:train:701-800batch: iter_time=0.001, forward_time=0.197, loss_ctc=73.468, loss_att=56.545, acc=0.693, loss=61.622, backward_time=0.799, grad_norm=89.781, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.117, optim0_lr0=9.106e-05, train_time=2.411 +[gpua003:0/64] 2023-07-06 10:33:52,355 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 10:34:11,454 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 10:34:14,921 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 10:34:14,921 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 10:34:14,928 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 10:38:42,065 (trainer:732) INFO: 17epoch:train:801-900batch: iter_time=1.652, forward_time=0.108, loss_ctc=78.264, loss_att=59.917, acc=0.689, loss=65.421, backward_time=0.771, grad_norm=105.575, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.103e-05, train_time=6.674 +[gpua003:0/64] 2023-07-06 10:40:21,942 (trainer:732) INFO: 17epoch:train:901-1000batch: iter_time=9.108e-05, forward_time=0.107, loss_ctc=68.668, loss_att=64.753, acc=0.669, loss=65.927, backward_time=0.750, grad_norm=89.386, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.100e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 10:42:01,601 (trainer:732) INFO: 17epoch:train:1001-1100batch: iter_time=9.405e-05, forward_time=0.107, loss_ctc=73.564, loss_att=60.516, acc=0.685, loss=64.430, backward_time=0.750, grad_norm=93.150, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.097e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 10:43:41,772 (trainer:732) INFO: 17epoch:train:1101-1200batch: iter_time=9.308e-05, forward_time=0.107, loss_ctc=70.639, loss_att=56.045, acc=0.686, loss=60.423, backward_time=0.751, grad_norm=87.677, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.094e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 10:45:21,649 (trainer:732) INFO: 17epoch:train:1201-1300batch: iter_time=9.487e-05, forward_time=0.107, loss_ctc=71.051, loss_att=55.501, acc=0.686, loss=60.166, backward_time=0.752, grad_norm=91.712, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.091e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 10:47:01,483 (trainer:732) INFO: 17epoch:train:1301-1400batch: iter_time=9.825e-05, forward_time=0.108, loss_ctc=78.942, loss_att=56.523, acc=0.684, loss=63.249, backward_time=0.752, grad_norm=90.368, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.088e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 10:48:41,812 (trainer:732) INFO: 17epoch:train:1401-1500batch: iter_time=1.015e-04, forward_time=0.114, loss_ctc=76.055, loss_att=56.659, acc=0.684, loss=62.478, backward_time=0.752, grad_norm=88.888, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.085e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 10:50:21,503 (trainer:732) INFO: 17epoch:train:1501-1600batch: iter_time=1.011e-04, forward_time=0.106, loss_ctc=72.092, loss_att=59.329, acc=0.681, loss=63.157, backward_time=0.751, grad_norm=83.857, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.082e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 10:51:28,539 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 10:51:47,761 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 10:51:51,243 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 10:51:51,244 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 10:51:51,250 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 10:55:23,750 (trainer:732) INFO: 17epoch:train:1601-1700batch: iter_time=1.339, forward_time=0.136, loss_ctc=75.727, loss_att=56.568, acc=0.694, loss=62.316, backward_time=0.762, grad_norm=98.984, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.114, optim0_lr0=9.079e-05, train_time=6.043 +[gpua003:0/64] 2023-07-06 10:57:04,140 (trainer:732) INFO: 17epoch:train:1701-1800batch: iter_time=1.070e-04, forward_time=0.110, loss_ctc=65.293, loss_att=58.713, acc=0.683, loss=60.687, backward_time=0.755, grad_norm=89.446, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.076e-05, train_time=2.009 +[gpua003:0/64] 2023-07-06 10:58:44,084 (trainer:732) INFO: 17epoch:train:1801-1900batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=74.486, loss_att=67.076, acc=0.685, loss=69.299, backward_time=0.753, grad_norm=99.307, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.073e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 11:00:24,124 (trainer:732) INFO: 17epoch:train:1901-2000batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=74.341, loss_att=60.362, acc=0.692, loss=64.556, backward_time=0.753, grad_norm=87.026, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.070e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 11:02:04,044 (trainer:732) INFO: 17epoch:train:2001-2100batch: iter_time=1.014e-04, forward_time=0.108, loss_ctc=66.178, loss_att=49.767, acc=0.697, loss=54.690, backward_time=0.753, grad_norm=86.555, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.067e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 11:03:43,843 (trainer:732) INFO: 17epoch:train:2101-2200batch: iter_time=9.901e-05, forward_time=0.109, loss_ctc=77.231, loss_att=58.176, acc=0.697, loss=63.892, backward_time=0.752, grad_norm=93.954, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.064e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:05:23,555 (trainer:732) INFO: 17epoch:train:2201-2300batch: iter_time=9.750e-05, forward_time=0.108, loss_ctc=75.802, loss_att=57.441, acc=0.693, loss=62.949, backward_time=0.751, grad_norm=89.231, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.061e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 11:07:03,243 (trainer:732) INFO: 17epoch:train:2301-2400batch: iter_time=1.062e-04, forward_time=0.108, loss_ctc=70.913, loss_att=52.438, acc=0.689, loss=57.980, backward_time=0.751, grad_norm=110.095, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.058e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 11:09:01,745 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 11:09:21,136 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 11:09:24,685 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 11:09:24,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 11:09:24,692 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 11:12:11,094 (trainer:732) INFO: 17epoch:train:2401-2500batch: iter_time=1.565, forward_time=0.109, loss_ctc=75.267, loss_att=58.496, acc=0.696, loss=63.527, backward_time=0.755, grad_norm=104.336, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.055e-05, train_time=6.157 +[gpua003:0/64] 2023-07-06 11:13:53,368 (trainer:732) INFO: 17epoch:train:2501-2600batch: iter_time=1.102e-04, forward_time=0.108, loss_ctc=70.363, loss_att=57.552, acc=0.686, loss=61.395, backward_time=0.759, grad_norm=92.372, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.052e-05, train_time=2.045 +[gpua003:0/64] 2023-07-06 11:15:33,128 (trainer:732) INFO: 17epoch:train:2601-2700batch: iter_time=1.054e-04, forward_time=0.108, loss_ctc=71.407, loss_att=65.980, acc=0.682, loss=67.608, backward_time=0.750, grad_norm=88.759, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.049e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 11:17:13,072 (trainer:732) INFO: 17epoch:train:2701-2800batch: iter_time=1.124e-04, forward_time=0.108, loss_ctc=73.031, loss_att=60.033, acc=0.686, loss=63.933, backward_time=0.751, grad_norm=87.774, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.046e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 11:18:52,468 (trainer:732) INFO: 17epoch:train:2801-2900batch: iter_time=1.132e-04, forward_time=0.106, loss_ctc=70.091, loss_att=54.398, acc=0.683, loss=59.106, backward_time=0.749, grad_norm=91.603, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.043e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 11:20:32,129 (trainer:732) INFO: 17epoch:train:2901-3000batch: iter_time=1.165e-04, forward_time=0.108, loss_ctc=74.247, loss_att=52.240, acc=0.696, loss=58.842, backward_time=0.750, grad_norm=94.573, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.040e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 11:22:11,683 (trainer:732) INFO: 17epoch:train:3001-3100batch: iter_time=1.201e-04, forward_time=0.107, loss_ctc=73.832, loss_att=58.188, acc=0.682, loss=62.881, backward_time=0.749, grad_norm=91.783, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.037e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 11:23:51,407 (trainer:732) INFO: 17epoch:train:3101-3200batch: iter_time=1.178e-04, forward_time=0.108, loss_ctc=72.306, loss_att=57.425, acc=0.683, loss=61.889, backward_time=0.751, grad_norm=96.037, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.034e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 11:25:30,906 (trainer:732) INFO: 17epoch:train:3201-3300batch: iter_time=1.122e-04, forward_time=0.107, loss_ctc=73.002, loss_att=56.253, acc=0.694, loss=61.278, backward_time=0.750, grad_norm=94.698, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.031e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 11:26:04,198 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 11:26:23,282 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 11:26:26,814 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 11:26:26,814 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 11:26:26,821 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 11:32:52,064 (trainer:732) INFO: 17epoch:train:3301-3400batch: iter_time=1.297, forward_time=0.107, loss_ctc=74.660, loss_att=57.798, acc=0.692, loss=62.857, backward_time=0.766, grad_norm=124.515, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.028e-05, train_time=8.823 +[gpua003:0/64] 2023-07-06 11:34:32,228 (trainer:732) INFO: 17epoch:train:3401-3500batch: iter_time=1.056e-04, forward_time=0.108, loss_ctc=67.842, loss_att=63.769, acc=0.687, loss=64.991, backward_time=0.753, grad_norm=85.874, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.025e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 11:36:12,063 (trainer:732) INFO: 17epoch:train:3501-3600batch: iter_time=9.523e-05, forward_time=0.108, loss_ctc=71.756, loss_att=59.431, acc=0.693, loss=63.129, backward_time=0.752, grad_norm=81.853, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.022e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:37:52,081 (trainer:732) INFO: 17epoch:train:3601-3700batch: iter_time=8.824e-05, forward_time=0.108, loss_ctc=71.855, loss_att=55.535, acc=0.697, loss=60.431, backward_time=0.752, grad_norm=82.300, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.020e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 11:39:37,163 (trainer:732) INFO: 17epoch:train:3701-3800batch: iter_time=9.169e-05, forward_time=0.108, loss_ctc=69.667, loss_att=54.813, acc=0.695, loss=59.269, backward_time=0.759, grad_norm=90.155, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.017e-05, train_time=2.101 +[gpua003:0/64] 2023-07-06 11:41:16,991 (trainer:732) INFO: 17epoch:train:3801-3900batch: iter_time=9.824e-05, forward_time=0.107, loss_ctc=77.697, loss_att=55.817, acc=0.700, loss=62.381, backward_time=0.751, grad_norm=89.894, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.014e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:42:56,825 (trainer:732) INFO: 17epoch:train:3901-4000batch: iter_time=9.875e-05, forward_time=0.107, loss_ctc=73.227, loss_att=54.909, acc=0.698, loss=60.404, backward_time=0.751, grad_norm=98.795, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.011e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:44:38,362 (trainer:732) INFO: 17epoch:train:4001-4100batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=72.543, loss_att=58.175, acc=0.689, loss=62.485, backward_time=0.755, grad_norm=89.796, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.008e-05, train_time=2.031 +[gpua003:0/64] 2023-07-06 11:45:44,663 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 11:46:03,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 11:46:07,363 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 11:46:07,363 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 11:46:07,369 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 11:51:11,190 (trainer:732) INFO: 17epoch:train:4101-4200batch: iter_time=1.302, forward_time=0.107, loss_ctc=75.093, loss_att=55.212, acc=0.696, loss=61.176, backward_time=0.773, grad_norm=88.273, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.005e-05, train_time=7.856 +[gpua003:0/64] 2023-07-06 11:52:51,935 (trainer:732) INFO: 17epoch:train:4201-4300batch: iter_time=9.852e-05, forward_time=0.108, loss_ctc=66.394, loss_att=59.086, acc=0.681, loss=61.279, backward_time=0.754, grad_norm=84.241, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.002e-05, train_time=2.015 +[gpua003:0/64] 2023-07-06 11:54:31,640 (trainer:732) INFO: 17epoch:train:4301-4400batch: iter_time=9.413e-05, forward_time=0.107, loss_ctc=71.953, loss_att=65.497, acc=0.672, loss=67.434, backward_time=0.751, grad_norm=102.244, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.999e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 11:56:11,417 (trainer:732) INFO: 17epoch:train:4401-4500batch: iter_time=9.684e-05, forward_time=0.107, loss_ctc=72.922, loss_att=58.511, acc=0.684, loss=62.834, backward_time=0.751, grad_norm=97.141, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.996e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 11:57:51,231 (trainer:732) INFO: 17epoch:train:4501-4600batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=70.213, loss_att=52.171, acc=0.696, loss=57.583, backward_time=0.751, grad_norm=81.295, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.993e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:59:30,948 (trainer:732) INFO: 17epoch:train:4601-4700batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=80.215, loss_att=60.929, acc=0.678, loss=66.715, backward_time=0.751, grad_norm=101.501, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.990e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 12:01:10,656 (trainer:732) INFO: 17epoch:train:4701-4800batch: iter_time=1.032e-04, forward_time=0.107, loss_ctc=72.860, loss_att=54.463, acc=0.685, loss=59.982, backward_time=0.750, grad_norm=86.781, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.987e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 12:02:50,263 (trainer:732) INFO: 17epoch:train:4801-4900batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=72.023, loss_att=55.370, acc=0.691, loss=60.366, backward_time=0.751, grad_norm=80.330, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.985e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 12:04:30,172 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 12:04:49,660 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 12:04:53,209 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 12:04:53,209 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 12:04:53,232 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 12:09:03,264 (trainer:732) INFO: 17epoch:train:4901-5000batch: iter_time=2.218, forward_time=0.107, loss_ctc=69.690, loss_att=56.365, acc=0.679, loss=60.363, backward_time=0.760, grad_norm=86.852, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.982e-05, train_time=7.460 +[gpua003:0/64] 2023-07-06 12:10:45,243 (trainer:732) INFO: 17epoch:train:5001-5100batch: iter_time=9.156e-05, forward_time=0.108, loss_ctc=69.222, loss_att=56.473, acc=0.696, loss=60.298, backward_time=0.760, grad_norm=92.071, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.979e-05, train_time=2.039 +[gpua003:0/64] 2023-07-06 12:12:25,764 (trainer:732) INFO: 17epoch:train:5101-5200batch: iter_time=1.022e-04, forward_time=0.108, loss_ctc=70.202, loss_att=64.051, acc=0.686, loss=65.896, backward_time=0.754, grad_norm=83.538, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.976e-05, train_time=2.010 +[gpua003:0/64] 2023-07-06 12:14:28,497 (trainer:732) INFO: 17epoch:train:5201-5300batch: iter_time=1.115e-04, forward_time=0.108, loss_ctc=72.332, loss_att=60.244, acc=0.687, loss=63.870, backward_time=0.813, grad_norm=91.528, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.973e-05, train_time=2.454 +[gpua003:0/64] 2023-07-06 12:16:08,436 (trainer:732) INFO: 17epoch:train:5301-5400batch: iter_time=8.814e-05, forward_time=0.109, loss_ctc=69.588, loss_att=52.565, acc=0.687, loss=57.672, backward_time=0.753, grad_norm=77.684, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.970e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 12:17:52,763 (trainer:732) INFO: 17epoch:train:5401-5500batch: iter_time=8.935e-05, forward_time=0.109, loss_ctc=73.912, loss_att=52.422, acc=0.696, loss=58.869, backward_time=0.776, grad_norm=84.516, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.967e-05, train_time=2.086 +[gpua003:0/64] 2023-07-06 12:19:32,754 (trainer:732) INFO: 17epoch:train:5501-5600batch: iter_time=8.392e-05, forward_time=0.108, loss_ctc=73.547, loss_att=57.855, acc=0.682, loss=62.562, backward_time=0.753, grad_norm=96.965, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.964e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 12:21:12,375 (trainer:732) INFO: 17epoch:train:5601-5700batch: iter_time=1.111e-04, forward_time=0.108, loss_ctc=71.195, loss_att=56.412, acc=0.687, loss=60.847, backward_time=0.751, grad_norm=116.013, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.961e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 12:22:52,067 (trainer:732) INFO: 17epoch:train:5701-5800batch: iter_time=8.850e-05, forward_time=0.108, loss_ctc=71.813, loss_att=55.666, acc=0.695, loss=60.510, backward_time=0.751, grad_norm=115.475, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.959e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 12:23:25,320 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 12:23:44,500 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 12:23:48,062 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 12:23:48,062 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 12:23:48,068 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 12:29:41,148 (trainer:732) INFO: 17epoch:train:5801-5900batch: iter_time=1.954, forward_time=0.110, loss_ctc=75.608, loss_att=57.604, acc=0.698, loss=63.005, backward_time=0.765, grad_norm=95.989, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.956e-05, train_time=8.181 +[gpua003:0/64] 2023-07-06 12:31:32,248 (trainer:732) INFO: 17epoch:train:5901-6000batch: iter_time=1.021e-04, forward_time=0.110, loss_ctc=65.942, loss_att=62.877, acc=0.691, loss=63.796, backward_time=0.780, grad_norm=90.009, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.953e-05, train_time=2.222 +[gpua003:0/64] 2023-07-06 12:33:12,332 (trainer:732) INFO: 17epoch:train:6001-6100batch: iter_time=1.069e-04, forward_time=0.110, loss_ctc=71.278, loss_att=58.934, acc=0.693, loss=62.637, backward_time=0.753, grad_norm=83.756, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.950e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 12:34:52,381 (trainer:732) INFO: 17epoch:train:6101-6200batch: iter_time=1.058e-04, forward_time=0.110, loss_ctc=70.544, loss_att=53.727, acc=0.704, loss=58.772, backward_time=0.753, grad_norm=79.862, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.947e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 12:36:34,193 (trainer:732) INFO: 17epoch:train:6201-6300batch: iter_time=1.036e-04, forward_time=0.110, loss_ctc=69.284, loss_att=54.871, acc=0.698, loss=59.195, backward_time=0.759, grad_norm=80.439, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.944e-05, train_time=2.036 +[gpua003:0/64] 2023-07-06 12:38:16,533 (trainer:732) INFO: 17epoch:train:6301-6400batch: iter_time=1.054e-04, forward_time=0.110, loss_ctc=76.122, loss_att=54.596, acc=0.703, loss=61.054, backward_time=0.756, grad_norm=92.990, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.941e-05, train_time=2.047 +[gpua003:0/64] 2023-07-06 12:40:03,767 (trainer:732) INFO: 17epoch:train:6401-6500batch: iter_time=1.067e-04, forward_time=0.109, loss_ctc=72.416, loss_att=54.767, acc=0.698, loss=60.062, backward_time=0.760, grad_norm=82.798, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.938e-05, train_time=2.144 +[gpua003:0/64] 2023-07-06 12:41:43,845 (trainer:732) INFO: 17epoch:train:6501-6600batch: iter_time=1.023e-04, forward_time=0.109, loss_ctc=69.309, loss_att=56.242, acc=0.693, loss=60.162, backward_time=0.752, grad_norm=84.956, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.936e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 12:42:53,074 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 12:43:12,502 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 12:43:16,051 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 12:43:16,051 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 12:43:16,058 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 12:48:47,376 (trainer:732) INFO: 17epoch:train:6601-6700batch: iter_time=1.326, forward_time=0.109, loss_ctc=73.679, loss_att=54.146, acc=0.702, loss=60.006, backward_time=0.774, grad_norm=87.071, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.933e-05, train_time=8.470 +[gpua003:0/64] 2023-07-06 12:50:28,471 (trainer:732) INFO: 17epoch:train:6701-6800batch: iter_time=9.504e-05, forward_time=0.109, loss_ctc=66.009, loss_att=56.529, acc=0.690, loss=59.373, backward_time=0.754, grad_norm=86.611, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.930e-05, train_time=2.022 +[gpua003:0/64] 2023-07-06 12:52:08,406 (trainer:732) INFO: 17epoch:train:6801-6900batch: iter_time=9.435e-05, forward_time=0.109, loss_ctc=71.563, loss_att=64.197, acc=0.675, loss=66.407, backward_time=0.752, grad_norm=91.386, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.927e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 12:53:48,230 (trainer:732) INFO: 17epoch:train:6901-7000batch: iter_time=1.083e-04, forward_time=0.108, loss_ctc=70.739, loss_att=56.404, acc=0.690, loss=60.704, backward_time=0.752, grad_norm=77.527, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.924e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 12:55:28,052 (trainer:732) INFO: 17epoch:train:7001-7100batch: iter_time=1.054e-04, forward_time=0.109, loss_ctc=67.590, loss_att=51.543, acc=0.698, loss=56.357, backward_time=0.752, grad_norm=86.798, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.921e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 12:57:07,643 (trainer:732) INFO: 17epoch:train:7101-7200batch: iter_time=1.132e-04, forward_time=0.108, loss_ctc=78.963, loss_att=61.659, acc=0.674, loss=66.850, backward_time=0.751, grad_norm=88.590, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.919e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 12:58:47,336 (trainer:732) INFO: 17epoch:train:7201-7300batch: iter_time=1.007e-04, forward_time=0.109, loss_ctc=71.829, loss_att=53.054, acc=0.688, loss=58.686, backward_time=0.752, grad_norm=99.178, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.916e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 13:00:27,154 (trainer:732) INFO: 17epoch:train:7301-7400batch: iter_time=9.624e-05, forward_time=0.110, loss_ctc=70.807, loss_att=54.983, acc=0.694, loss=59.730, backward_time=0.752, grad_norm=96.232, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.913e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 13:02:07,410 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 13:02:26,581 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 13:02:30,447 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 13:02:30,447 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 13:02:30,453 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 13:06:49,822 (trainer:732) INFO: 17epoch:train:7401-7500batch: iter_time=1.317, forward_time=0.107, loss_ctc=69.012, loss_att=56.036, acc=0.683, loss=59.929, backward_time=0.766, grad_norm=92.081, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.910e-05, train_time=7.653 +[gpua003:0/64] 2023-07-06 13:08:33,082 (trainer:732) INFO: 17epoch:train:7501-7600batch: iter_time=1.091e-04, forward_time=0.109, loss_ctc=70.374, loss_att=56.800, acc=0.695, loss=60.872, backward_time=0.760, grad_norm=93.389, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.907e-05, train_time=2.065 +[gpua003:0/64] 2023-07-06 13:10:13,369 (trainer:732) INFO: 17epoch:train:7601-7700batch: iter_time=1.035e-04, forward_time=0.109, loss_ctc=71.632, loss_att=65.224, acc=0.684, loss=67.146, backward_time=0.753, grad_norm=93.615, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.904e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 13:11:53,366 (trainer:732) INFO: 17epoch:train:7701-7800batch: iter_time=1.042e-04, forward_time=0.108, loss_ctc=71.948, loss_att=59.571, acc=0.687, loss=63.284, backward_time=0.753, grad_norm=89.530, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.902e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 13:13:33,022 (trainer:732) INFO: 17epoch:train:7801-7900batch: iter_time=1.008e-04, forward_time=0.106, loss_ctc=71.162, loss_att=53.846, acc=0.687, loss=59.041, backward_time=0.751, grad_norm=93.637, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.899e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 13:15:12,530 (trainer:732) INFO: 17epoch:train:7901-8000batch: iter_time=1.035e-04, forward_time=0.106, loss_ctc=73.768, loss_att=51.961, acc=0.697, loss=58.503, backward_time=0.750, grad_norm=93.453, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.896e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 13:16:52,321 (trainer:732) INFO: 17epoch:train:8001-8100batch: iter_time=1.051e-04, forward_time=0.107, loss_ctc=72.484, loss_att=57.703, acc=0.684, loss=62.137, backward_time=0.751, grad_norm=92.203, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.893e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 13:18:31,917 (trainer:732) INFO: 17epoch:train:8101-8200batch: iter_time=1.031e-04, forward_time=0.106, loss_ctc=69.942, loss_att=55.765, acc=0.690, loss=60.018, backward_time=0.751, grad_norm=85.911, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.890e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 13:20:16,333 (trainer:732) INFO: 17epoch:train:8201-8300batch: iter_time=1.102e-04, forward_time=0.113, loss_ctc=69.944, loss_att=53.888, acc=0.702, loss=58.705, backward_time=0.762, grad_norm=93.148, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.887e-05, train_time=2.088 +[gpua003:0/64] 2023-07-06 13:20:50,086 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 13:21:09,066 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 13:21:12,570 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 13:21:12,570 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 13:21:12,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 13:25:44,808 (trainer:732) INFO: 17epoch:train:8301-8400batch: iter_time=1.287, forward_time=0.108, loss_ctc=72.637, loss_att=55.648, acc=0.694, loss=60.745, backward_time=0.776, grad_norm=93.234, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.885e-05, train_time=6.569 +[gpua003:0/64] 2023-07-06 13:27:26,151 (trainer:732) INFO: 17epoch:train:8401-8500batch: iter_time=9.020e-05, forward_time=0.108, loss_ctc=71.200, loss_att=66.006, acc=0.691, loss=67.564, backward_time=0.752, grad_norm=84.887, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.882e-05, train_time=2.027 +[gpua003:0/64] 2023-07-06 13:29:06,443 (trainer:732) INFO: 17epoch:train:8501-8600batch: iter_time=9.487e-05, forward_time=0.108, loss_ctc=69.241, loss_att=57.750, acc=0.691, loss=61.197, backward_time=0.752, grad_norm=80.923, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.879e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 13:30:46,540 (trainer:732) INFO: 17epoch:train:8601-8700batch: iter_time=9.387e-05, forward_time=0.108, loss_ctc=71.052, loss_att=54.608, acc=0.696, loss=59.541, backward_time=0.752, grad_norm=81.533, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.876e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 13:32:26,237 (trainer:732) INFO: 17epoch:train:8701-8800batch: iter_time=8.946e-05, forward_time=0.107, loss_ctc=68.519, loss_att=53.218, acc=0.703, loss=57.809, backward_time=0.751, grad_norm=87.296, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.873e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 13:34:06,088 (trainer:732) INFO: 17epoch:train:8801-8900batch: iter_time=9.110e-05, forward_time=0.108, loss_ctc=75.321, loss_att=56.064, acc=0.703, loss=61.841, backward_time=0.752, grad_norm=83.253, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.871e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 13:35:45,773 (trainer:732) INFO: 17epoch:train:8901-9000batch: iter_time=8.413e-05, forward_time=0.107, loss_ctc=71.840, loss_att=52.405, acc=0.690, loss=58.236, backward_time=0.751, grad_norm=98.759, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.868e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 13:37:25,621 (trainer:732) INFO: 17epoch:train:9001-9100batch: iter_time=8.465e-05, forward_time=0.107, loss_ctc=67.828, loss_att=57.185, acc=0.695, loss=60.378, backward_time=0.753, grad_norm=87.054, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.865e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 13:38:32,627 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 13:38:52,045 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 13:38:55,588 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 13:38:55,588 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 13:38:55,594 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 13:43:25,159 (trainer:732) INFO: 17epoch:train:9101-9200batch: iter_time=1.294, forward_time=0.108, loss_ctc=73.928, loss_att=56.389, acc=0.694, loss=61.651, backward_time=0.764, grad_norm=128.846, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.862e-05, train_time=7.191 +[gpua003:0/64] 2023-07-06 13:45:08,132 (trainer:732) INFO: 17epoch:train:9201-9300batch: iter_time=9.071e-05, forward_time=0.106, loss_ctc=66.150, loss_att=58.093, acc=0.695, loss=60.510, backward_time=0.763, grad_norm=85.331, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.860e-05, train_time=2.059 +[gpua003:0/64] 2023-07-06 13:46:48,580 (trainer:732) INFO: 17epoch:train:9301-9400batch: iter_time=9.128e-05, forward_time=0.106, loss_ctc=69.745, loss_att=63.280, acc=0.688, loss=65.219, backward_time=0.751, grad_norm=101.929, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.857e-05, train_time=2.009 +[gpua003:0/64] 2023-07-06 13:48:37,657 (trainer:732) INFO: 17epoch:train:9401-9500batch: iter_time=8.943e-05, forward_time=0.106, loss_ctc=70.169, loss_att=55.042, acc=0.701, loss=59.580, backward_time=0.766, grad_norm=83.719, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.854e-05, train_time=2.181 +[gpua003:0/64] 2023-07-06 13:50:17,323 (trainer:732) INFO: 17epoch:train:9501-9600batch: iter_time=9.534e-05, forward_time=0.106, loss_ctc=68.339, loss_att=52.207, acc=0.704, loss=57.047, backward_time=0.751, grad_norm=77.700, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.851e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 13:51:57,215 (trainer:732) INFO: 17epoch:train:9601-9700batch: iter_time=9.239e-05, forward_time=0.106, loss_ctc=78.747, loss_att=60.575, acc=0.695, loss=66.026, backward_time=0.751, grad_norm=98.273, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.848e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 13:53:36,907 (trainer:732) INFO: 17epoch:train:9701-9800batch: iter_time=9.160e-05, forward_time=0.107, loss_ctc=71.624, loss_att=54.061, acc=0.696, loss=59.330, backward_time=0.750, grad_norm=94.224, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.846e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 13:55:16,555 (trainer:732) INFO: 17epoch:train:9801-9900batch: iter_time=8.975e-05, forward_time=0.106, loss_ctc=70.198, loss_att=52.936, acc=0.705, loss=58.115, backward_time=0.751, grad_norm=84.449, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.843e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 13:56:56,201 (trainer:732) INFO: 17epoch:train:9901-10000batch: iter_time=8.856e-05, forward_time=0.107, loss_ctc=71.418, loss_att=58.211, acc=0.687, loss=62.173, backward_time=0.750, grad_norm=99.355, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.840e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 14:09:08,370 (trainer:338) INFO: 17epoch results: [train] iter_time=0.192, forward_time=0.112, loss_ctc=72.034, loss_att=57.326, acc=0.690, loss=61.738, backward_time=0.758, grad_norm=91.386, clip=100.000, loss_scale=2.342e+16, optim_step_time=0.113, optim0_lr0=8.981e-05, train_time=2.678, time=3 hours, 43 minutes and 23.37 seconds, total_count=140000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=58.336, cer_ctc=0.306, loss_att=49.239, acc=0.657, cer=0.353, wer=0.988, loss=51.968, time=5 minutes and 55.68 seconds, total_count=14674, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 5.85 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-06 14:09:27,436 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-06 14:09:27,626 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/12epoch.pth +[gpua003:0/64] 2023-07-06 14:09:27,714 (trainer:272) INFO: 18/100epoch started. Estimated time to finish: 1 week, 6 days and 8 hours +[gpua003:0/64] 2023-07-06 14:09:29,063 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 14:09:48,025 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 14:09:53,240 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 14:09:53,240 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 14:09:53,338 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 14:19:30,835 (trainer:732) INFO: 18epoch:train:1-100batch: iter_time=4.929, forward_time=0.153, loss_ctc=76.115, loss_att=63.275, acc=0.682, loss=67.127, backward_time=0.773, grad_norm=114.970, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.116, optim0_lr0=8.837e-05, train_time=12.048 +[gpua003:0/64] 2023-07-06 14:21:10,892 (trainer:732) INFO: 18epoch:train:101-200batch: iter_time=1.040e-04, forward_time=0.108, loss_ctc=78.407, loss_att=64.441, acc=0.685, loss=68.631, backward_time=0.752, grad_norm=128.032, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.835e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 14:22:53,009 (trainer:732) INFO: 18epoch:train:201-300batch: iter_time=9.913e-05, forward_time=0.108, loss_ctc=66.363, loss_att=49.349, acc=0.706, loss=54.453, backward_time=0.751, grad_norm=82.992, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.832e-05, train_time=2.042 +[gpua003:0/64] 2023-07-06 14:24:32,622 (trainer:732) INFO: 18epoch:train:301-400batch: iter_time=8.736e-05, forward_time=0.107, loss_ctc=85.395, loss_att=58.982, acc=0.692, loss=66.906, backward_time=0.752, grad_norm=105.306, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.829e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 14:26:14,069 (trainer:732) INFO: 18epoch:train:401-500batch: iter_time=8.796e-05, forward_time=0.107, loss_ctc=74.067, loss_att=58.678, acc=0.674, loss=63.295, backward_time=0.752, grad_norm=98.618, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.826e-05, train_time=2.029 +[gpua003:0/64] 2023-07-06 14:27:54,124 (trainer:732) INFO: 18epoch:train:501-600batch: iter_time=9.281e-05, forward_time=0.107, loss_ctc=85.710, loss_att=69.303, acc=0.665, loss=74.225, backward_time=0.753, grad_norm=104.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.824e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 14:29:33,936 (trainer:732) INFO: 18epoch:train:601-700batch: iter_time=8.948e-05, forward_time=0.107, loss_ctc=68.980, loss_att=52.042, acc=0.680, loss=57.123, backward_time=0.752, grad_norm=124.515, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.821e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 14:31:21,706 (trainer:732) INFO: 18epoch:train:701-800batch: iter_time=3.960e-04, forward_time=0.142, loss_ctc=86.717, loss_att=65.821, acc=0.661, loss=72.090, backward_time=0.767, grad_norm=105.973, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.818e-05, train_time=2.152 +[gpua003:0/64] 2023-07-06 14:32:09,857 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 14:32:28,641 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 14:32:32,360 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 14:32:32,360 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 14:32:32,366 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 14:36:07,379 (trainer:732) INFO: 18epoch:train:801-900batch: iter_time=1.622, forward_time=0.152, loss_ctc=74.970, loss_att=58.171, acc=0.683, loss=63.211, backward_time=0.778, grad_norm=89.744, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.115, optim0_lr0=8.815e-05, train_time=5.717 +[gpua003:0/64] 2023-07-06 14:37:47,747 (trainer:732) INFO: 18epoch:train:901-1000batch: iter_time=9.918e-05, forward_time=0.108, loss_ctc=75.573, loss_att=63.432, acc=0.686, loss=67.074, backward_time=0.752, grad_norm=104.391, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.813e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 14:39:27,570 (trainer:732) INFO: 18epoch:train:1001-1100batch: iter_time=9.562e-05, forward_time=0.108, loss_ctc=71.935, loss_att=56.850, acc=0.696, loss=61.375, backward_time=0.751, grad_norm=93.037, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.810e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 14:41:07,498 (trainer:732) INFO: 18epoch:train:1101-1200batch: iter_time=1.016e-04, forward_time=0.109, loss_ctc=73.347, loss_att=52.973, acc=0.703, loss=59.086, backward_time=0.751, grad_norm=99.082, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.807e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 14:42:47,249 (trainer:732) INFO: 18epoch:train:1201-1300batch: iter_time=9.793e-05, forward_time=0.108, loss_ctc=83.048, loss_att=61.707, acc=0.686, loss=68.110, backward_time=0.750, grad_norm=102.821, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.804e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 14:44:27,022 (trainer:732) INFO: 18epoch:train:1301-1400batch: iter_time=1.028e-04, forward_time=0.108, loss_ctc=78.082, loss_att=59.145, acc=0.672, loss=64.826, backward_time=0.751, grad_norm=120.118, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.802e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 14:46:06,792 (trainer:732) INFO: 18epoch:train:1401-1500batch: iter_time=1.063e-04, forward_time=0.109, loss_ctc=77.767, loss_att=61.606, acc=0.674, loss=66.454, backward_time=0.752, grad_norm=122.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.799e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 14:47:46,697 (trainer:732) INFO: 18epoch:train:1501-1600batch: iter_time=9.485e-05, forward_time=0.110, loss_ctc=73.758, loss_att=56.709, acc=0.684, loss=61.824, backward_time=0.753, grad_norm=95.013, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.796e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 14:48:53,758 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 14:49:13,084 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 14:49:16,877 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 14:49:16,877 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 14:49:16,883 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 14:53:39,624 (trainer:732) INFO: 18epoch:train:1601-1700batch: iter_time=1.292, forward_time=0.109, loss_ctc=77.398, loss_att=58.944, acc=0.666, loss=64.480, backward_time=0.761, grad_norm=104.361, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.794e-05, train_time=7.058 +[gpua003:0/64] 2023-07-06 14:55:19,937 (trainer:732) INFO: 18epoch:train:1701-1800batch: iter_time=1.040e-04, forward_time=0.108, loss_ctc=72.292, loss_att=58.658, acc=0.692, loss=62.748, backward_time=0.754, grad_norm=90.979, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.791e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 14:57:00,068 (trainer:732) INFO: 18epoch:train:1801-1900batch: iter_time=9.482e-05, forward_time=0.108, loss_ctc=75.435, loss_att=59.110, acc=0.697, loss=64.008, backward_time=0.753, grad_norm=94.982, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.788e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 14:58:41,886 (trainer:732) INFO: 18epoch:train:1901-2000batch: iter_time=8.814e-05, forward_time=0.108, loss_ctc=66.463, loss_att=50.107, acc=0.714, loss=55.014, backward_time=0.753, grad_norm=94.405, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.785e-05, train_time=2.036 +[gpua003:0/64] 2023-07-06 15:00:21,511 (trainer:732) INFO: 18epoch:train:2001-2100batch: iter_time=8.940e-05, forward_time=0.108, loss_ctc=87.948, loss_att=64.112, acc=0.676, loss=71.263, backward_time=0.751, grad_norm=117.699, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.783e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 15:02:01,222 (trainer:732) INFO: 18epoch:train:2101-2200batch: iter_time=8.983e-05, forward_time=0.108, loss_ctc=78.447, loss_att=60.484, acc=0.681, loss=65.873, backward_time=0.752, grad_norm=100.916, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.780e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 15:03:48,764 (trainer:732) INFO: 18epoch:train:2201-2300batch: iter_time=9.119e-05, forward_time=0.107, loss_ctc=79.251, loss_att=62.785, acc=0.668, loss=67.725, backward_time=0.760, grad_norm=93.330, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.777e-05, train_time=2.151 +[gpua003:0/64] 2023-07-06 15:05:58,569 (trainer:732) INFO: 18epoch:train:2301-2400batch: iter_time=8.569e-05, forward_time=0.108, loss_ctc=66.068, loss_att=48.122, acc=0.690, loss=53.506, backward_time=0.810, grad_norm=98.868, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.775e-05, train_time=2.596 +[gpua003:0/64] 2023-07-06 15:08:21,871 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 15:08:41,122 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 15:08:44,919 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 15:08:44,919 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 15:08:44,925 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 15:11:51,642 (trainer:732) INFO: 18epoch:train:2401-2500batch: iter_time=1.318, forward_time=0.127, loss_ctc=84.958, loss_att=62.041, acc=0.667, loss=68.916, backward_time=0.850, grad_norm=104.647, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.772e-05, train_time=7.061 +[gpua003:0/64] 2023-07-06 15:13:33,609 (trainer:732) INFO: 18epoch:train:2501-2600batch: iter_time=1.008e-04, forward_time=0.108, loss_ctc=75.155, loss_att=62.287, acc=0.691, loss=66.148, backward_time=0.764, grad_norm=96.943, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.769e-05, train_time=2.039 +[gpua003:0/64] 2023-07-06 15:15:13,429 (trainer:732) INFO: 18epoch:train:2601-2700batch: iter_time=1.057e-04, forward_time=0.108, loss_ctc=76.932, loss_att=63.507, acc=0.694, loss=67.535, backward_time=0.750, grad_norm=96.917, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.766e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 15:16:53,331 (trainer:732) INFO: 18epoch:train:2701-2800batch: iter_time=1.094e-04, forward_time=0.108, loss_ctc=62.411, loss_att=46.895, acc=0.715, loss=51.550, backward_time=0.752, grad_norm=92.496, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.764e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 15:18:33,060 (trainer:732) INFO: 18epoch:train:2801-2900batch: iter_time=9.940e-05, forward_time=0.108, loss_ctc=81.354, loss_att=57.151, acc=0.696, loss=64.412, backward_time=0.751, grad_norm=109.033, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.761e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 15:20:12,823 (trainer:732) INFO: 18epoch:train:2901-3000batch: iter_time=1.129e-04, forward_time=0.108, loss_ctc=73.741, loss_att=58.586, acc=0.679, loss=63.132, backward_time=0.752, grad_norm=91.478, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.758e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:21:52,679 (trainer:732) INFO: 18epoch:train:3001-3100batch: iter_time=1.015e-04, forward_time=0.109, loss_ctc=83.727, loss_att=65.874, acc=0.669, loss=71.230, backward_time=0.752, grad_norm=106.440, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.756e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 15:23:32,451 (trainer:732) INFO: 18epoch:train:3101-3200batch: iter_time=1.030e-04, forward_time=0.109, loss_ctc=67.147, loss_att=49.955, acc=0.689, loss=55.113, backward_time=0.751, grad_norm=107.665, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.753e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:25:14,937 (trainer:732) INFO: 18epoch:train:3201-3300batch: iter_time=9.253e-05, forward_time=0.108, loss_ctc=81.896, loss_att=61.574, acc=0.673, loss=67.671, backward_time=0.753, grad_norm=105.660, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.750e-05, train_time=2.049 +[gpua003:0/64] 2023-07-06 15:25:48,262 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 15:26:07,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 15:26:11,328 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 15:26:11,328 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 15:26:11,334 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 15:29:42,037 (trainer:732) INFO: 18epoch:train:3301-3400batch: iter_time=1.323, forward_time=0.110, loss_ctc=72.978, loss_att=56.716, acc=0.686, loss=61.595, backward_time=0.775, grad_norm=91.640, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.748e-05, train_time=5.342 +[gpua003:0/64] 2023-07-06 15:31:22,262 (trainer:732) INFO: 18epoch:train:3401-3500batch: iter_time=1.067e-04, forward_time=0.108, loss_ctc=72.957, loss_att=62.528, acc=0.685, loss=65.657, backward_time=0.753, grad_norm=88.960, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.745e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 15:33:02,184 (trainer:732) INFO: 18epoch:train:3501-3600batch: iter_time=1.066e-04, forward_time=0.109, loss_ctc=67.727, loss_att=54.922, acc=0.697, loss=58.763, backward_time=0.753, grad_norm=96.147, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.742e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 15:34:41,830 (trainer:732) INFO: 18epoch:train:3601-3700batch: iter_time=1.085e-04, forward_time=0.108, loss_ctc=72.865, loss_att=53.783, acc=0.699, loss=59.507, backward_time=0.752, grad_norm=105.810, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.740e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 15:36:21,533 (trainer:732) INFO: 18epoch:train:3701-3800batch: iter_time=1.178e-04, forward_time=0.109, loss_ctc=78.415, loss_att=59.688, acc=0.681, loss=65.306, backward_time=0.752, grad_norm=94.163, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.737e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 15:38:01,291 (trainer:732) INFO: 18epoch:train:3801-3900batch: iter_time=1.163e-04, forward_time=0.109, loss_ctc=74.930, loss_att=57.824, acc=0.677, loss=62.956, backward_time=0.752, grad_norm=98.833, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.734e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:39:41,058 (trainer:732) INFO: 18epoch:train:3901-4000batch: iter_time=1.116e-04, forward_time=0.109, loss_ctc=73.796, loss_att=59.813, acc=0.674, loss=64.008, backward_time=0.753, grad_norm=105.954, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.732e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:41:20,935 (trainer:732) INFO: 18epoch:train:4001-4100batch: iter_time=9.829e-05, forward_time=0.109, loss_ctc=71.016, loss_att=54.671, acc=0.686, loss=59.574, backward_time=0.753, grad_norm=88.325, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.729e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 15:42:27,084 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 15:42:46,118 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 15:42:49,698 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 15:42:49,698 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 15:42:49,704 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 15:46:38,959 (trainer:732) INFO: 18epoch:train:4101-4200batch: iter_time=1.311, forward_time=0.109, loss_ctc=82.633, loss_att=57.106, acc=0.674, loss=64.764, backward_time=0.765, grad_norm=108.721, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.726e-05, train_time=6.360 +[gpua003:0/64] 2023-07-06 15:48:19,686 (trainer:732) INFO: 18epoch:train:4201-4300batch: iter_time=9.726e-05, forward_time=0.109, loss_ctc=74.521, loss_att=62.322, acc=0.689, loss=65.982, backward_time=0.757, grad_norm=96.759, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.724e-05, train_time=2.014 +[gpua003:0/64] 2023-07-06 15:49:59,357 (trainer:732) INFO: 18epoch:train:4301-4400batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=68.417, loss_att=54.908, acc=0.700, loss=58.961, backward_time=0.751, grad_norm=87.929, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.721e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 15:51:39,010 (trainer:732) INFO: 18epoch:train:4401-4500batch: iter_time=1.019e-04, forward_time=0.108, loss_ctc=72.963, loss_att=54.105, acc=0.704, loss=59.763, backward_time=0.750, grad_norm=121.911, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.718e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 15:53:18,767 (trainer:732) INFO: 18epoch:train:4501-4600batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=78.466, loss_att=59.773, acc=0.690, loss=65.381, backward_time=0.751, grad_norm=96.401, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.716e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:54:58,498 (trainer:732) INFO: 18epoch:train:4601-4700batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=76.432, loss_att=58.796, acc=0.677, loss=64.087, backward_time=0.752, grad_norm=97.092, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.713e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 15:56:38,047 (trainer:732) INFO: 18epoch:train:4701-4800batch: iter_time=9.499e-05, forward_time=0.108, loss_ctc=79.331, loss_att=63.852, acc=0.665, loss=68.495, backward_time=0.750, grad_norm=256.176, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.710e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 15:58:19,964 (trainer:732) INFO: 18epoch:train:4801-4900batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=68.325, loss_att=52.879, acc=0.682, loss=57.513, backward_time=0.751, grad_norm=118.723, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.708e-05, train_time=2.038 +[gpua003:0/64] 2023-07-06 16:00:00,347 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 16:00:19,563 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 16:00:23,071 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 16:00:23,071 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 16:00:23,077 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 16:03:54,812 (trainer:732) INFO: 18epoch:train:4901-5000batch: iter_time=1.281, forward_time=0.108, loss_ctc=83.769, loss_att=60.724, acc=0.667, loss=67.638, backward_time=0.761, grad_norm=148.559, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.705e-05, train_time=6.697 +[gpua003:0/64] 2023-07-06 16:05:37,501 (trainer:732) INFO: 18epoch:train:5001-5100batch: iter_time=9.821e-05, forward_time=0.108, loss_ctc=74.654, loss_att=62.672, acc=0.686, loss=66.267, backward_time=0.760, grad_norm=88.103, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.702e-05, train_time=2.054 +[gpua003:0/64] 2023-07-06 16:07:17,638 (trainer:732) INFO: 18epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=74.717, loss_att=61.342, acc=0.690, loss=65.354, backward_time=0.751, grad_norm=111.773, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.700e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 16:08:57,432 (trainer:732) INFO: 18epoch:train:5201-5300batch: iter_time=1.039e-04, forward_time=0.109, loss_ctc=62.277, loss_att=47.038, acc=0.711, loss=51.609, backward_time=0.752, grad_norm=77.470, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.697e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 16:10:37,101 (trainer:732) INFO: 18epoch:train:5301-5400batch: iter_time=1.087e-04, forward_time=0.108, loss_ctc=81.357, loss_att=56.958, acc=0.687, loss=64.278, backward_time=0.751, grad_norm=148.292, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.695e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 16:12:16,859 (trainer:732) INFO: 18epoch:train:5401-5500batch: iter_time=1.099e-04, forward_time=0.109, loss_ctc=73.380, loss_att=57.785, acc=0.682, loss=62.463, backward_time=0.751, grad_norm=91.302, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.692e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 16:13:56,539 (trainer:732) INFO: 18epoch:train:5501-5600batch: iter_time=1.039e-04, forward_time=0.108, loss_ctc=81.378, loss_att=65.579, acc=0.665, loss=70.319, backward_time=0.751, grad_norm=99.738, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.689e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 16:15:35,974 (trainer:732) INFO: 18epoch:train:5601-5700batch: iter_time=1.127e-04, forward_time=0.108, loss_ctc=65.790, loss_att=48.674, acc=0.695, loss=53.808, backward_time=0.749, grad_norm=111.144, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.687e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 16:17:15,561 (trainer:732) INFO: 18epoch:train:5701-5800batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=82.875, loss_att=60.270, acc=0.669, loss=67.051, backward_time=0.750, grad_norm=114.126, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.684e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 16:17:48,761 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 16:18:08,112 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 16:18:11,617 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 16:18:11,617 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 16:18:11,623 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 16:22:38,235 (trainer:732) INFO: 18epoch:train:5801-5900batch: iter_time=1.313, forward_time=0.109, loss_ctc=73.660, loss_att=59.347, acc=0.685, loss=63.641, backward_time=0.764, grad_norm=132.057, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.681e-05, train_time=6.453 +[gpua003:0/64] 2023-07-06 16:24:18,781 (trainer:732) INFO: 18epoch:train:5901-6000batch: iter_time=1.002e-04, forward_time=0.109, loss_ctc=76.119, loss_att=58.539, acc=0.690, loss=63.813, backward_time=0.754, grad_norm=88.467, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.679e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 16:26:06,138 (trainer:732) INFO: 18epoch:train:6001-6100batch: iter_time=9.305e-05, forward_time=0.109, loss_ctc=67.971, loss_att=52.451, acc=0.704, loss=57.107, backward_time=0.766, grad_norm=95.395, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.676e-05, train_time=2.147 +[gpua003:0/64] 2023-07-06 16:27:49,192 (trainer:732) INFO: 18epoch:train:6101-6200batch: iter_time=9.965e-05, forward_time=0.108, loss_ctc=77.390, loss_att=55.435, acc=0.689, loss=62.021, backward_time=0.756, grad_norm=106.144, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.674e-05, train_time=2.061 +[gpua003:0/64] 2023-07-06 16:29:29,408 (trainer:732) INFO: 18epoch:train:6201-6300batch: iter_time=9.552e-05, forward_time=0.109, loss_ctc=70.826, loss_att=56.281, acc=0.684, loss=60.644, backward_time=0.753, grad_norm=94.037, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.671e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 16:31:09,234 (trainer:732) INFO: 18epoch:train:6301-6400batch: iter_time=9.346e-05, forward_time=0.107, loss_ctc=78.001, loss_att=63.797, acc=0.672, loss=68.058, backward_time=0.751, grad_norm=98.998, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.668e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 16:32:48,924 (trainer:732) INFO: 18epoch:train:6401-6500batch: iter_time=9.280e-05, forward_time=0.108, loss_ctc=72.916, loss_att=53.940, acc=0.675, loss=59.633, backward_time=0.751, grad_norm=86.057, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.666e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 16:34:29,082 (trainer:732) INFO: 18epoch:train:6501-6600batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=74.326, loss_att=55.265, acc=0.677, loss=60.983, backward_time=0.752, grad_norm=94.716, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.663e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 16:35:38,397 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 16:35:57,667 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 16:36:01,099 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 16:36:01,100 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 16:36:01,106 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 16:41:32,144 (trainer:732) INFO: 18epoch:train:6601-6700batch: iter_time=1.282, forward_time=0.107, loss_ctc=83.492, loss_att=58.963, acc=0.681, loss=66.321, backward_time=0.770, grad_norm=107.633, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.661e-05, train_time=8.461 +[gpua003:0/64] 2023-07-06 16:43:17,374 (trainer:732) INFO: 18epoch:train:6701-6800batch: iter_time=9.000e-05, forward_time=0.107, loss_ctc=72.670, loss_att=61.072, acc=0.690, loss=64.551, backward_time=0.767, grad_norm=93.284, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.658e-05, train_time=2.104 +[gpua003:0/64] 2023-07-06 16:44:57,475 (trainer:732) INFO: 18epoch:train:6801-6900batch: iter_time=8.929e-05, forward_time=0.107, loss_ctc=68.519, loss_att=54.104, acc=0.697, loss=58.428, backward_time=0.751, grad_norm=89.012, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.655e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 16:46:44,014 (trainer:732) INFO: 18epoch:train:6901-7000batch: iter_time=8.633e-05, forward_time=0.107, loss_ctc=73.189, loss_att=52.360, acc=0.706, loss=58.609, backward_time=0.755, grad_norm=131.163, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.653e-05, train_time=2.131 +[gpua003:0/64] 2023-07-06 16:48:38,048 (trainer:732) INFO: 18epoch:train:7001-7100batch: iter_time=9.301e-05, forward_time=0.107, loss_ctc=76.911, loss_att=60.205, acc=0.681, loss=65.217, backward_time=0.775, grad_norm=91.195, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.650e-05, train_time=2.280 +[gpua003:0/64] 2023-07-06 16:50:17,837 (trainer:732) INFO: 18epoch:train:7101-7200batch: iter_time=9.519e-05, forward_time=0.107, loss_ctc=73.833, loss_att=56.759, acc=0.685, loss=61.881, backward_time=0.750, grad_norm=97.936, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.648e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 16:51:58,408 (trainer:732) INFO: 18epoch:train:7201-7300batch: iter_time=9.376e-05, forward_time=0.107, loss_ctc=76.637, loss_att=62.319, acc=0.667, loss=66.614, backward_time=0.752, grad_norm=113.058, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.645e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 16:54:05,563 (trainer:732) INFO: 18epoch:train:7301-7400batch: iter_time=1.831e-04, forward_time=0.124, loss_ctc=68.742, loss_att=52.246, acc=0.684, loss=57.195, backward_time=0.816, grad_norm=109.431, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.119, optim0_lr0=8.642e-05, train_time=2.543 +[gpua003:0/64] 2023-07-06 16:55:47,933 (trainer:732) INFO: 18epoch:train:7401-7500batch: iter_time=9.365e-05, forward_time=0.110, loss_ctc=82.184, loss_att=57.189, acc=0.675, loss=64.687, backward_time=0.760, grad_norm=142.708, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.640e-05, train_time=2.047 +[gpua003:0/64] 2023-07-06 16:55:54,106 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 16:56:13,402 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 16:56:16,909 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 16:56:16,910 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 16:56:16,916 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 17:00:34,079 (trainer:732) INFO: 18epoch:train:7501-7600batch: iter_time=1.505, forward_time=0.108, loss_ctc=74.300, loss_att=63.152, acc=0.683, loss=66.497, backward_time=0.765, grad_norm=141.163, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.637e-05, train_time=5.723 +[gpua003:0/64] 2023-07-06 17:02:15,733 (trainer:732) INFO: 18epoch:train:7601-7700batch: iter_time=9.276e-05, forward_time=0.109, loss_ctc=75.341, loss_att=59.650, acc=0.691, loss=64.357, backward_time=0.757, grad_norm=131.218, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.635e-05, train_time=2.033 +[gpua003:0/64] 2023-07-06 17:04:00,532 (trainer:732) INFO: 18epoch:train:7701-7800batch: iter_time=9.513e-05, forward_time=0.108, loss_ctc=62.710, loss_att=47.592, acc=0.714, loss=52.128, backward_time=0.758, grad_norm=100.496, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.632e-05, train_time=2.096 +[gpua003:0/64] 2023-07-06 17:05:41,261 (trainer:732) INFO: 18epoch:train:7801-7900batch: iter_time=1.024e-04, forward_time=0.109, loss_ctc=80.778, loss_att=56.500, acc=0.691, loss=63.783, backward_time=0.753, grad_norm=159.514, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.630e-05, train_time=2.014 +[gpua003:0/64] 2023-07-06 17:07:23,370 (trainer:732) INFO: 18epoch:train:7901-8000batch: iter_time=9.871e-05, forward_time=0.109, loss_ctc=72.426, loss_att=57.186, acc=0.677, loss=61.758, backward_time=0.755, grad_norm=100.381, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.627e-05, train_time=2.042 +[gpua003:0/64] 2023-07-06 17:09:03,079 (trainer:732) INFO: 18epoch:train:8001-8100batch: iter_time=9.808e-05, forward_time=0.108, loss_ctc=81.683, loss_att=65.407, acc=0.673, loss=70.289, backward_time=0.751, grad_norm=103.619, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.624e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 17:10:54,113 (trainer:732) INFO: 18epoch:train:8101-8200batch: iter_time=1.053e-04, forward_time=0.108, loss_ctc=64.899, loss_att=47.680, acc=0.696, loss=52.846, backward_time=0.765, grad_norm=84.417, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.622e-05, train_time=2.220 +[gpua003:0/64] 2023-07-06 17:12:33,937 (trainer:732) INFO: 18epoch:train:8201-8300batch: iter_time=9.519e-05, forward_time=0.107, loss_ctc=80.088, loss_att=59.873, acc=0.669, loss=65.937, backward_time=0.751, grad_norm=140.365, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.619e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 17:13:08,280 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 17:13:27,649 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 17:13:31,147 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 17:13:31,147 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 17:13:31,153 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 17:18:59,187 (trainer:732) INFO: 18epoch:train:8301-8400batch: iter_time=1.294, forward_time=0.107, loss_ctc=72.616, loss_att=56.151, acc=0.688, loss=61.090, backward_time=0.763, grad_norm=159.886, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.617e-05, train_time=7.705 +[gpua003:0/64] 2023-07-06 17:20:40,408 (trainer:732) INFO: 18epoch:train:8401-8500batch: iter_time=1.002e-04, forward_time=0.108, loss_ctc=72.837, loss_att=60.069, acc=0.694, loss=63.899, backward_time=0.752, grad_norm=118.934, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.614e-05, train_time=2.024 +[gpua003:0/64] 2023-07-06 17:22:20,274 (trainer:732) INFO: 18epoch:train:8501-8600batch: iter_time=9.768e-05, forward_time=0.108, loss_ctc=68.421, loss_att=53.892, acc=0.698, loss=58.250, backward_time=0.752, grad_norm=96.100, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.612e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 17:24:14,320 (trainer:732) INFO: 18epoch:train:8601-8700batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=68.857, loss_att=51.084, acc=0.707, loss=56.416, backward_time=0.768, grad_norm=78.940, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.609e-05, train_time=2.281 +[gpua003:0/64] 2023-07-06 17:25:54,219 (trainer:732) INFO: 18epoch:train:8701-8800batch: iter_time=1.021e-04, forward_time=0.108, loss_ctc=79.583, loss_att=60.458, acc=0.683, loss=66.195, backward_time=0.752, grad_norm=99.339, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.607e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 17:27:34,084 (trainer:732) INFO: 18epoch:train:8801-8900batch: iter_time=9.920e-05, forward_time=0.108, loss_ctc=74.773, loss_att=57.128, acc=0.683, loss=62.421, backward_time=0.753, grad_norm=91.643, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.604e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 17:29:13,700 (trainer:732) INFO: 18epoch:train:8901-9000batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=74.198, loss_att=58.596, acc=0.679, loss=63.277, backward_time=0.750, grad_norm=131.138, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.601e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 17:30:53,462 (trainer:732) INFO: 18epoch:train:9001-9100batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=72.289, loss_att=54.298, acc=0.688, loss=59.695, backward_time=0.750, grad_norm=98.942, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.599e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 17:32:00,571 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 17:32:19,636 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 17:32:23,155 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 17:32:23,155 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 17:32:23,161 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 17:36:57,445 (trainer:732) INFO: 18epoch:train:9101-9200batch: iter_time=1.310, forward_time=0.109, loss_ctc=81.340, loss_att=56.298, acc=0.680, loss=63.811, backward_time=0.761, grad_norm=111.662, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.596e-05, train_time=7.279 +[gpua003:0/64] 2023-07-06 17:38:39,715 (trainer:732) INFO: 18epoch:train:9201-9300batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=73.285, loss_att=63.161, acc=0.686, loss=66.198, backward_time=0.757, grad_norm=88.279, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.594e-05, train_time=2.045 +[gpua003:0/64] 2023-07-06 17:40:23,113 (trainer:732) INFO: 18epoch:train:9301-9400batch: iter_time=1.137e-04, forward_time=0.109, loss_ctc=68.800, loss_att=55.601, acc=0.706, loss=59.561, backward_time=0.756, grad_norm=92.752, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.591e-05, train_time=2.068 +[gpua003:0/64] 2023-07-06 17:42:05,313 (trainer:732) INFO: 18epoch:train:9401-9500batch: iter_time=1.016e-04, forward_time=0.108, loss_ctc=72.284, loss_att=52.694, acc=0.710, loss=58.571, backward_time=0.754, grad_norm=98.040, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.589e-05, train_time=2.044 +[gpua003:0/64] 2023-07-06 17:43:45,031 (trainer:732) INFO: 18epoch:train:9501-9600batch: iter_time=1.102e-04, forward_time=0.109, loss_ctc=76.407, loss_att=59.152, acc=0.693, loss=64.328, backward_time=0.751, grad_norm=111.122, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.586e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 17:45:24,576 (trainer:732) INFO: 18epoch:train:9601-9700batch: iter_time=1.125e-04, forward_time=0.107, loss_ctc=74.718, loss_att=58.743, acc=0.682, loss=63.535, backward_time=0.750, grad_norm=102.896, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.584e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 17:47:08,153 (trainer:732) INFO: 18epoch:train:9701-9800batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=76.130, loss_att=62.394, acc=0.672, loss=66.515, backward_time=0.752, grad_norm=99.445, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.581e-05, train_time=2.071 +[gpua003:0/64] 2023-07-06 17:48:48,960 (trainer:732) INFO: 18epoch:train:9801-9900batch: iter_time=9.027e-05, forward_time=0.108, loss_ctc=67.693, loss_att=51.247, acc=0.689, loss=56.181, backward_time=0.758, grad_norm=101.173, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.579e-05, train_time=2.016 +[gpua003:0/64] 2023-07-06 17:50:28,839 (trainer:732) INFO: 18epoch:train:9901-10000batch: iter_time=8.743e-05, forward_time=0.109, loss_ctc=81.308, loss_att=58.352, acc=0.679, loss=65.239, backward_time=0.752, grad_norm=97.815, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.576e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 18:02:52,400 (trainer:338) INFO: 18epoch results: [train] iter_time=0.198, forward_time=0.110, loss_ctc=75.020, loss_att=57.840, acc=0.686, loss=62.994, backward_time=0.757, grad_norm=106.540, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.705e-05, train_time=2.652, time=3 hours, 41 minutes and 15.98 seconds, total_count=150000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.528, cer_ctc=0.288, loss_att=42.295, acc=0.657, cer=0.376, wer=0.987, loss=44.765, time=5 minutes and 47.09 seconds, total_count=15686, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 21.52 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-06 18:03:10,995 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-06 18:03:11,099 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/13epoch.pth +[gpua003:0/64] 2023-07-06 18:03:11,142 (trainer:272) INFO: 19/100epoch started. Estimated time to finish: 1 week, 6 days and 5 hours +[gpua003:0/64] 2023-07-06 18:03:12,483 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 18:03:31,414 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 18:03:36,058 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 18:03:36,058 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 18:03:36,157 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 18:10:33,322 (trainer:732) INFO: 19epoch:train:1-100batch: iter_time=3.360, forward_time=0.134, loss_ctc=71.878, loss_att=52.487, acc=0.687, loss=58.305, backward_time=0.768, grad_norm=98.210, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.115, optim0_lr0=8.574e-05, train_time=8.830 +[gpua003:0/64] 2023-07-06 18:12:13,736 (trainer:732) INFO: 19epoch:train:101-200batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=73.951, loss_att=54.611, acc=0.685, loss=60.413, backward_time=0.752, grad_norm=97.472, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.571e-05, train_time=2.008 +[gpua003:0/64] 2023-07-06 18:13:54,017 (trainer:732) INFO: 19epoch:train:201-300batch: iter_time=9.643e-05, forward_time=0.108, loss_ctc=71.382, loss_att=53.617, acc=0.688, loss=58.947, backward_time=0.750, grad_norm=79.712, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.569e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 18:15:36,625 (trainer:732) INFO: 19epoch:train:301-400batch: iter_time=9.724e-05, forward_time=0.108, loss_ctc=75.527, loss_att=61.014, acc=0.673, loss=65.368, backward_time=0.757, grad_norm=94.346, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.566e-05, train_time=2.052 +[gpua003:0/64] 2023-07-06 18:17:17,089 (trainer:732) INFO: 19epoch:train:401-500batch: iter_time=9.887e-05, forward_time=0.109, loss_ctc=73.023, loss_att=58.274, acc=0.684, loss=62.699, backward_time=0.751, grad_norm=90.553, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.564e-05, train_time=2.009 +[gpua003:0/64] 2023-07-06 18:19:01,828 (trainer:732) INFO: 19epoch:train:501-600batch: iter_time=8.940e-05, forward_time=0.108, loss_ctc=71.310, loss_att=57.750, acc=0.697, loss=61.818, backward_time=0.756, grad_norm=100.266, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.561e-05, train_time=2.095 +[gpua003:0/64] 2023-07-06 18:21:04,708 (trainer:732) INFO: 19epoch:train:601-700batch: iter_time=9.789e-05, forward_time=0.110, loss_ctc=68.405, loss_att=52.910, acc=0.711, loss=57.558, backward_time=0.796, grad_norm=82.647, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.559e-05, train_time=2.457 +[gpua003:0/64] 2023-07-06 18:22:58,725 (trainer:732) INFO: 19epoch:train:701-800batch: iter_time=1.057e-04, forward_time=0.111, loss_ctc=85.051, loss_att=69.937, acc=0.676, loss=74.471, backward_time=0.799, grad_norm=123.148, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.556e-05, train_time=2.280 +[gpua003:0/64] 2023-07-06 18:23:39,249 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 18:23:57,976 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 18:24:01,673 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 18:24:01,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 18:24:01,679 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 18:27:57,894 (trainer:732) INFO: 19epoch:train:801-900batch: iter_time=1.349, forward_time=0.109, loss_ctc=74.949, loss_att=56.631, acc=0.679, loss=62.126, backward_time=0.766, grad_norm=85.017, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.553e-05, train_time=5.983 +[gpua003:0/64] 2023-07-06 18:29:38,263 (trainer:732) INFO: 19epoch:train:901-1000batch: iter_time=1.127e-04, forward_time=0.108, loss_ctc=74.503, loss_att=54.158, acc=0.674, loss=60.262, backward_time=0.751, grad_norm=102.346, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.551e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 18:31:18,016 (trainer:732) INFO: 19epoch:train:1001-1100batch: iter_time=1.100e-04, forward_time=0.108, loss_ctc=74.767, loss_att=56.937, acc=0.684, loss=62.286, backward_time=0.752, grad_norm=82.548, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.548e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 18:32:57,536 (trainer:732) INFO: 19epoch:train:1101-1200batch: iter_time=1.159e-04, forward_time=0.107, loss_ctc=70.675, loss_att=55.035, acc=0.670, loss=59.727, backward_time=0.752, grad_norm=94.405, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.546e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 18:34:37,089 (trainer:732) INFO: 19epoch:train:1201-1300batch: iter_time=1.106e-04, forward_time=0.107, loss_ctc=76.324, loss_att=59.642, acc=0.686, loss=64.647, backward_time=0.751, grad_norm=93.883, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.544e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 18:36:17,028 (trainer:732) INFO: 19epoch:train:1301-1400batch: iter_time=9.751e-05, forward_time=0.109, loss_ctc=67.885, loss_att=54.884, acc=0.685, loss=58.784, backward_time=0.753, grad_norm=91.475, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.541e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 18:37:56,460 (trainer:732) INFO: 19epoch:train:1401-1500batch: iter_time=1.118e-04, forward_time=0.107, loss_ctc=66.717, loss_att=53.410, acc=0.694, loss=57.402, backward_time=0.750, grad_norm=80.970, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.539e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 18:39:36,031 (trainer:732) INFO: 19epoch:train:1501-1600batch: iter_time=8.864e-05, forward_time=0.108, loss_ctc=81.647, loss_att=67.344, acc=0.685, loss=71.635, backward_time=0.750, grad_norm=114.722, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.536e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 18:40:44,261 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 18:41:03,688 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 18:41:07,475 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 18:41:07,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 18:41:07,482 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 18:45:41,292 (trainer:732) INFO: 19epoch:train:1601-1700batch: iter_time=1.309, forward_time=0.108, loss_ctc=77.277, loss_att=60.983, acc=0.668, loss=65.871, backward_time=0.768, grad_norm=93.748, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.534e-05, train_time=7.305 +[gpua003:0/64] 2023-07-06 18:47:21,491 (trainer:732) INFO: 19epoch:train:1701-1800batch: iter_time=1.143e-04, forward_time=0.109, loss_ctc=73.968, loss_att=51.681, acc=0.683, loss=58.367, backward_time=0.753, grad_norm=97.133, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.531e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 18:49:01,195 (trainer:732) INFO: 19epoch:train:1801-1900batch: iter_time=1.050e-04, forward_time=0.108, loss_ctc=73.852, loss_att=56.822, acc=0.683, loss=61.931, backward_time=0.751, grad_norm=95.195, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.529e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 18:50:40,963 (trainer:732) INFO: 19epoch:train:1901-2000batch: iter_time=1.119e-04, forward_time=0.109, loss_ctc=68.398, loss_att=50.868, acc=0.690, loss=56.127, backward_time=0.752, grad_norm=82.918, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.526e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 18:52:20,494 (trainer:732) INFO: 19epoch:train:2001-2100batch: iter_time=1.182e-04, forward_time=0.108, loss_ctc=72.453, loss_att=55.434, acc=0.682, loss=60.540, backward_time=0.750, grad_norm=91.971, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.524e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 18:54:00,169 (trainer:732) INFO: 19epoch:train:2101-2200batch: iter_time=1.126e-04, forward_time=0.109, loss_ctc=70.471, loss_att=60.656, acc=0.681, loss=63.600, backward_time=0.752, grad_norm=92.734, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.521e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 18:55:43,499 (trainer:732) INFO: 19epoch:train:2201-2300batch: iter_time=1.184e-04, forward_time=0.109, loss_ctc=70.887, loss_att=55.798, acc=0.691, loss=60.325, backward_time=0.753, grad_norm=87.370, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.519e-05, train_time=2.066 +[gpua003:0/64] 2023-07-06 18:57:28,468 (trainer:732) INFO: 19epoch:train:2301-2400batch: iter_time=1.142e-04, forward_time=0.108, loss_ctc=74.302, loss_att=62.084, acc=0.687, loss=65.750, backward_time=0.757, grad_norm=90.024, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.516e-05, train_time=2.099 +[gpua003:0/64] 2023-07-06 18:59:09,045 (trainer:732) INFO: 19epoch:train:2401-2500batch: iter_time=9.995e-05, forward_time=0.109, loss_ctc=83.902, loss_att=62.709, acc=0.676, loss=69.067, backward_time=0.757, grad_norm=139.737, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.514e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 18:59:11,328 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 18:59:30,666 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 18:59:34,430 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 18:59:34,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 18:59:34,436 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 19:05:02,322 (trainer:732) INFO: 19epoch:train:2501-2600batch: iter_time=1.326, forward_time=0.108, loss_ctc=70.364, loss_att=51.755, acc=0.683, loss=57.338, backward_time=0.793, grad_norm=99.243, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.511e-05, train_time=7.065 +[gpua003:0/64] 2023-07-06 19:06:42,318 (trainer:732) INFO: 19epoch:train:2601-2700batch: iter_time=1.063e-04, forward_time=0.108, loss_ctc=73.763, loss_att=53.729, acc=0.683, loss=59.739, backward_time=0.752, grad_norm=99.427, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.509e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 19:08:21,986 (trainer:732) INFO: 19epoch:train:2701-2800batch: iter_time=9.496e-05, forward_time=0.107, loss_ctc=69.928, loss_att=54.179, acc=0.687, loss=58.904, backward_time=0.752, grad_norm=87.933, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.506e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 19:10:01,778 (trainer:732) INFO: 19epoch:train:2801-2900batch: iter_time=9.847e-05, forward_time=0.108, loss_ctc=70.824, loss_att=55.649, acc=0.683, loss=60.201, backward_time=0.753, grad_norm=104.518, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.504e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 19:11:43,478 (trainer:732) INFO: 19epoch:train:2901-3000batch: iter_time=8.790e-05, forward_time=0.108, loss_ctc=71.744, loss_att=56.572, acc=0.683, loss=61.124, backward_time=0.753, grad_norm=97.646, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.501e-05, train_time=2.034 +[gpua003:0/64] 2023-07-06 19:13:23,321 (trainer:732) INFO: 19epoch:train:3001-3100batch: iter_time=8.518e-05, forward_time=0.108, loss_ctc=68.969, loss_att=56.135, acc=0.697, loss=59.985, backward_time=0.752, grad_norm=93.645, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.499e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 19:15:02,845 (trainer:732) INFO: 19epoch:train:3101-3200batch: iter_time=9.418e-05, forward_time=0.107, loss_ctc=66.684, loss_att=49.410, acc=0.712, loss=54.592, backward_time=0.751, grad_norm=86.075, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.496e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 19:16:42,465 (trainer:732) INFO: 19epoch:train:3201-3300batch: iter_time=1.012e-04, forward_time=0.108, loss_ctc=83.199, loss_att=70.634, acc=0.670, loss=74.404, backward_time=0.751, grad_norm=111.083, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.494e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 19:17:18,787 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 19:17:38,120 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 19:17:41,665 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 19:17:41,665 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 19:17:41,672 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 19:23:47,034 (trainer:732) INFO: 19epoch:train:3301-3400batch: iter_time=3.115, forward_time=0.165, loss_ctc=73.481, loss_att=53.362, acc=0.685, loss=59.397, backward_time=0.768, grad_norm=89.020, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.115, optim0_lr0=8.492e-05, train_time=8.491 +[gpua003:0/64] 2023-07-06 19:25:27,393 (trainer:732) INFO: 19epoch:train:3401-3500batch: iter_time=1.015e-04, forward_time=0.109, loss_ctc=74.841, loss_att=53.947, acc=0.678, loss=60.215, backward_time=0.753, grad_norm=92.488, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.489e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 19:27:17,900 (trainer:732) INFO: 19epoch:train:3501-3600batch: iter_time=8.651e-05, forward_time=0.108, loss_ctc=73.019, loss_att=56.463, acc=0.684, loss=61.429, backward_time=0.762, grad_norm=97.312, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.487e-05, train_time=2.210 +[gpua003:0/64] 2023-07-06 19:28:57,771 (trainer:732) INFO: 19epoch:train:3601-3700batch: iter_time=9.262e-05, forward_time=0.108, loss_ctc=70.917, loss_att=54.082, acc=0.678, loss=59.133, backward_time=0.753, grad_norm=99.327, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.484e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 19:30:37,491 (trainer:732) INFO: 19epoch:train:3701-3800batch: iter_time=1.079e-04, forward_time=0.107, loss_ctc=73.384, loss_att=57.937, acc=0.690, loss=62.571, backward_time=0.750, grad_norm=93.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.482e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 19:32:17,208 (trainer:732) INFO: 19epoch:train:3801-3900batch: iter_time=1.142e-04, forward_time=0.108, loss_ctc=67.881, loss_att=55.385, acc=0.682, loss=59.133, backward_time=0.751, grad_norm=104.375, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.479e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 19:33:56,953 (trainer:732) INFO: 19epoch:train:3901-4000batch: iter_time=1.101e-04, forward_time=0.108, loss_ctc=66.750, loss_att=52.965, acc=0.698, loss=57.101, backward_time=0.752, grad_norm=101.994, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.477e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 19:35:36,807 (trainer:732) INFO: 19epoch:train:4001-4100batch: iter_time=9.853e-05, forward_time=0.108, loss_ctc=78.586, loss_att=65.422, acc=0.693, loss=69.371, backward_time=0.753, grad_norm=90.671, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.475e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 19:36:46,521 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 19:37:05,501 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 19:37:09,017 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 19:37:09,017 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 19:37:09,023 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 19:42:01,856 (trainer:732) INFO: 19epoch:train:4101-4200batch: iter_time=1.271, forward_time=0.108, loss_ctc=77.470, loss_att=59.834, acc=0.674, loss=65.125, backward_time=0.764, grad_norm=104.225, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.472e-05, train_time=7.701 +[gpua003:0/64] 2023-07-06 19:43:42,839 (trainer:732) INFO: 19epoch:train:4201-4300batch: iter_time=9.875e-05, forward_time=0.108, loss_ctc=72.007, loss_att=52.479, acc=0.697, loss=58.337, backward_time=0.754, grad_norm=104.085, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.470e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 19:45:25,413 (trainer:732) INFO: 19epoch:train:4301-4400batch: iter_time=8.662e-05, forward_time=0.108, loss_ctc=75.337, loss_att=58.219, acc=0.688, loss=63.354, backward_time=0.759, grad_norm=99.286, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.467e-05, train_time=2.051 +[gpua003:0/64] 2023-07-06 19:47:05,267 (trainer:732) INFO: 19epoch:train:4401-4500batch: iter_time=1.128e-04, forward_time=0.108, loss_ctc=68.687, loss_att=50.921, acc=0.699, loss=56.251, backward_time=0.752, grad_norm=83.793, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.465e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 19:48:45,031 (trainer:732) INFO: 19epoch:train:4501-4600batch: iter_time=1.138e-04, forward_time=0.109, loss_ctc=66.702, loss_att=53.486, acc=0.690, loss=57.451, backward_time=0.751, grad_norm=87.055, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.462e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 19:50:28,601 (trainer:732) INFO: 19epoch:train:4601-4700batch: iter_time=1.079e-04, forward_time=0.108, loss_ctc=72.286, loss_att=61.308, acc=0.686, loss=64.601, backward_time=0.756, grad_norm=99.764, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.460e-05, train_time=2.071 +[gpua003:0/64] 2023-07-06 19:52:08,198 (trainer:732) INFO: 19epoch:train:4701-4800batch: iter_time=1.036e-04, forward_time=0.108, loss_ctc=71.867, loss_att=54.172, acc=0.705, loss=59.481, backward_time=0.750, grad_norm=109.482, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.458e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 19:53:47,911 (trainer:732) INFO: 19epoch:train:4801-4900batch: iter_time=1.030e-04, forward_time=0.108, loss_ctc=73.498, loss_att=60.646, acc=0.710, loss=64.501, backward_time=0.751, grad_norm=132.193, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.455e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 19:55:27,532 (trainer:732) INFO: 19epoch:train:4901-5000batch: iter_time=1.088e-04, forward_time=0.108, loss_ctc=81.536, loss_att=64.509, acc=0.679, loss=69.617, backward_time=0.751, grad_norm=111.664, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.453e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 19:55:30,048 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 19:55:48,920 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 19:55:52,427 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 19:55:52,427 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 19:55:52,433 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 20:00:40,776 (trainer:732) INFO: 19epoch:train:5001-5100batch: iter_time=1.366, forward_time=0.108, loss_ctc=69.849, loss_att=51.729, acc=0.695, loss=57.165, backward_time=0.763, grad_norm=87.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.450e-05, train_time=6.265 +[gpua003:0/64] 2023-07-06 20:02:20,937 (trainer:732) INFO: 19epoch:train:5101-5200batch: iter_time=9.291e-05, forward_time=0.108, loss_ctc=70.810, loss_att=51.826, acc=0.701, loss=57.521, backward_time=0.752, grad_norm=89.983, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.448e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 20:04:03,241 (trainer:732) INFO: 19epoch:train:5201-5300batch: iter_time=9.771e-05, forward_time=0.109, loss_ctc=70.882, loss_att=52.510, acc=0.700, loss=58.021, backward_time=0.755, grad_norm=78.370, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.445e-05, train_time=2.046 +[gpua003:0/64] 2023-07-06 20:05:51,446 (trainer:732) INFO: 19epoch:train:5301-5400batch: iter_time=9.570e-05, forward_time=0.107, loss_ctc=69.962, loss_att=57.223, acc=0.687, loss=61.045, backward_time=0.760, grad_norm=91.273, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.443e-05, train_time=2.164 +[gpua003:0/64] 2023-07-06 20:07:31,682 (trainer:732) INFO: 19epoch:train:5401-5500batch: iter_time=9.179e-05, forward_time=0.107, loss_ctc=70.911, loss_att=55.955, acc=0.689, loss=60.442, backward_time=0.751, grad_norm=91.242, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.441e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 20:09:13,295 (trainer:732) INFO: 19epoch:train:5501-5600batch: iter_time=9.333e-05, forward_time=0.107, loss_ctc=72.418, loss_att=59.104, acc=0.701, loss=63.098, backward_time=0.754, grad_norm=97.947, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.438e-05, train_time=2.032 +[gpua003:0/64] 2023-07-06 20:10:53,158 (trainer:732) INFO: 19epoch:train:5601-5700batch: iter_time=9.882e-05, forward_time=0.107, loss_ctc=66.916, loss_att=50.106, acc=0.718, loss=55.149, backward_time=0.751, grad_norm=81.033, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.436e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 20:12:34,924 (trainer:732) INFO: 19epoch:train:5701-5800batch: iter_time=9.385e-05, forward_time=0.108, loss_ctc=82.933, loss_att=68.684, acc=0.688, loss=72.959, backward_time=0.756, grad_norm=95.394, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.433e-05, train_time=2.035 +[gpua003:0/64] 2023-07-06 20:13:11,362 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 20:13:30,240 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 20:13:33,729 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 20:13:33,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 20:13:33,736 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 20:17:25,315 (trainer:732) INFO: 19epoch:train:5801-5900batch: iter_time=1.332, forward_time=0.109, loss_ctc=72.517, loss_att=53.535, acc=0.694, loss=59.230, backward_time=0.764, grad_norm=91.357, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.431e-05, train_time=5.808 +[gpua003:0/64] 2023-07-06 20:19:05,549 (trainer:732) INFO: 19epoch:train:5901-6000batch: iter_time=9.979e-05, forward_time=0.108, loss_ctc=72.968, loss_att=53.312, acc=0.689, loss=59.209, backward_time=0.753, grad_norm=105.818, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.429e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 20:20:45,322 (trainer:732) INFO: 19epoch:train:6001-6100batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=72.912, loss_att=54.448, acc=0.697, loss=59.987, backward_time=0.751, grad_norm=86.049, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.426e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 20:22:41,206 (trainer:732) INFO: 19epoch:train:6101-6200batch: iter_time=9.789e-05, forward_time=0.115, loss_ctc=67.667, loss_att=52.369, acc=0.694, loss=56.958, backward_time=0.773, grad_norm=79.432, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.424e-05, train_time=2.317 +[gpua003:0/64] 2023-07-06 20:24:21,811 (trainer:732) INFO: 19epoch:train:6201-6300batch: iter_time=1.002e-04, forward_time=0.109, loss_ctc=72.569, loss_att=58.922, acc=0.692, loss=63.016, backward_time=0.756, grad_norm=114.121, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.421e-05, train_time=2.012 +[gpua003:0/64] 2023-07-06 20:26:02,019 (trainer:732) INFO: 19epoch:train:6301-6400batch: iter_time=1.042e-04, forward_time=0.110, loss_ctc=67.317, loss_att=54.258, acc=0.696, loss=58.176, backward_time=0.753, grad_norm=86.928, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.419e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 20:27:41,637 (trainer:732) INFO: 19epoch:train:6401-6500batch: iter_time=1.161e-04, forward_time=0.108, loss_ctc=66.587, loss_att=54.003, acc=0.706, loss=57.778, backward_time=0.750, grad_norm=93.696, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.417e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 20:29:21,369 (trainer:732) INFO: 19epoch:train:6501-6600batch: iter_time=1.061e-04, forward_time=0.109, loss_ctc=78.539, loss_att=62.535, acc=0.711, loss=67.336, backward_time=0.751, grad_norm=87.737, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.414e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 20:30:33,131 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 20:30:52,251 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 20:30:55,813 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 20:30:55,813 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 20:30:55,820 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 20:35:42,565 (trainer:732) INFO: 19epoch:train:6601-6700batch: iter_time=1.437, forward_time=0.110, loss_ctc=76.173, loss_att=59.992, acc=0.683, loss=64.847, backward_time=0.771, grad_norm=92.415, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.412e-05, train_time=7.624 +[gpua003:0/64] 2023-07-06 20:37:35,033 (trainer:732) INFO: 19epoch:train:6701-6800batch: iter_time=2.117e-04, forward_time=0.111, loss_ctc=69.258, loss_att=50.177, acc=0.703, loss=55.902, backward_time=0.765, grad_norm=87.979, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.410e-05, train_time=2.249 +[gpua003:0/64] 2023-07-06 20:39:33,155 (trainer:732) INFO: 19epoch:train:6801-6900batch: iter_time=8.933e-05, forward_time=0.110, loss_ctc=76.772, loss_att=57.481, acc=0.692, loss=63.268, backward_time=0.809, grad_norm=113.312, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.407e-05, train_time=2.362 +[gpua003:0/64] 2023-07-06 20:41:18,407 (trainer:732) INFO: 19epoch:train:6901-7000batch: iter_time=8.764e-05, forward_time=0.107, loss_ctc=69.838, loss_att=51.828, acc=0.699, loss=57.231, backward_time=0.770, grad_norm=80.826, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.405e-05, train_time=2.105 +[gpua003:0/64] 2023-07-06 20:43:01,811 (trainer:732) INFO: 19epoch:train:7001-7100batch: iter_time=8.895e-05, forward_time=0.108, loss_ctc=65.767, loss_att=52.168, acc=0.697, loss=56.247, backward_time=0.757, grad_norm=105.272, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.402e-05, train_time=2.068 +[gpua003:0/64] 2023-07-06 20:44:41,509 (trainer:732) INFO: 19epoch:train:7101-7200batch: iter_time=9.980e-05, forward_time=0.108, loss_ctc=71.601, loss_att=60.582, acc=0.689, loss=63.888, backward_time=0.751, grad_norm=87.586, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.400e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 20:46:21,090 (trainer:732) INFO: 19epoch:train:7201-7300batch: iter_time=1.134e-04, forward_time=0.108, loss_ctc=70.650, loss_att=53.644, acc=0.707, loss=58.745, backward_time=0.750, grad_norm=87.476, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.398e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 20:48:02,044 (trainer:732) INFO: 19epoch:train:7301-7400batch: iter_time=8.952e-05, forward_time=0.108, loss_ctc=72.244, loss_att=60.016, acc=0.710, loss=63.685, backward_time=0.752, grad_norm=87.451, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.395e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 20:49:41,828 (trainer:732) INFO: 19epoch:train:7401-7500batch: iter_time=8.351e-05, forward_time=0.108, loss_ctc=78.571, loss_att=61.691, acc=0.689, loss=66.755, backward_time=0.751, grad_norm=104.440, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.393e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 20:49:52,973 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 20:50:12,449 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 20:50:15,993 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 20:50:15,993 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 20:50:15,999 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 20:54:12,385 (trainer:732) INFO: 19epoch:train:7501-7600batch: iter_time=1.571, forward_time=0.130, loss_ctc=72.069, loss_att=54.503, acc=0.682, loss=59.773, backward_time=0.763, grad_norm=94.845, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.391e-05, train_time=5.411 +[gpua003:0/64] 2023-07-06 20:55:52,369 (trainer:732) INFO: 19epoch:train:7601-7700batch: iter_time=9.053e-05, forward_time=0.107, loss_ctc=69.256, loss_att=49.020, acc=0.700, loss=55.091, backward_time=0.751, grad_norm=86.882, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.388e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 20:57:32,499 (trainer:732) INFO: 19epoch:train:7701-7800batch: iter_time=8.971e-05, forward_time=0.107, loss_ctc=71.084, loss_att=55.278, acc=0.687, loss=60.020, backward_time=0.750, grad_norm=85.779, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.386e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 20:59:12,329 (trainer:732) INFO: 19epoch:train:7801-7900batch: iter_time=1.121e-04, forward_time=0.109, loss_ctc=69.918, loss_att=55.616, acc=0.691, loss=59.906, backward_time=0.752, grad_norm=90.907, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.383e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 21:00:52,131 (trainer:732) INFO: 19epoch:train:7901-8000batch: iter_time=1.091e-04, forward_time=0.110, loss_ctc=72.031, loss_att=58.828, acc=0.687, loss=62.788, backward_time=0.752, grad_norm=90.990, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.381e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 21:02:31,776 (trainer:732) INFO: 19epoch:train:8001-8100batch: iter_time=1.210e-04, forward_time=0.109, loss_ctc=69.895, loss_att=59.175, acc=0.679, loss=62.391, backward_time=0.751, grad_norm=96.311, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.379e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 21:04:11,333 (trainer:732) INFO: 19epoch:train:8101-8200batch: iter_time=1.196e-04, forward_time=0.109, loss_ctc=65.075, loss_att=46.957, acc=0.713, loss=52.393, backward_time=0.752, grad_norm=79.989, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.376e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 21:05:50,952 (trainer:732) INFO: 19epoch:train:8201-8300batch: iter_time=1.100e-04, forward_time=0.109, loss_ctc=83.512, loss_att=69.549, acc=0.682, loss=73.738, backward_time=0.752, grad_norm=92.984, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.374e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 21:06:26,463 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 21:06:45,859 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 21:06:49,639 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 21:06:49,639 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 21:06:49,645 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 21:10:21,900 (trainer:732) INFO: 19epoch:train:8301-8400batch: iter_time=1.289, forward_time=0.109, loss_ctc=73.988, loss_att=57.572, acc=0.681, loss=62.497, backward_time=0.774, grad_norm=92.924, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.372e-05, train_time=5.419 +[gpua003:0/64] 2023-07-06 21:12:03,025 (trainer:732) INFO: 19epoch:train:8401-8500batch: iter_time=9.593e-05, forward_time=0.108, loss_ctc=71.684, loss_att=50.782, acc=0.691, loss=57.053, backward_time=0.753, grad_norm=91.974, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.369e-05, train_time=2.022 +[gpua003:0/64] 2023-07-06 21:13:42,791 (trainer:732) INFO: 19epoch:train:8501-8600batch: iter_time=8.646e-05, forward_time=0.109, loss_ctc=74.130, loss_att=56.299, acc=0.692, loss=61.648, backward_time=0.753, grad_norm=87.929, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.367e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 21:15:23,522 (trainer:732) INFO: 19epoch:train:8601-8700batch: iter_time=9.629e-05, forward_time=0.109, loss_ctc=68.342, loss_att=52.472, acc=0.685, loss=57.233, backward_time=0.753, grad_norm=89.721, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.365e-05, train_time=2.014 +[gpua003:0/64] 2023-07-06 21:17:03,161 (trainer:732) INFO: 19epoch:train:8701-8800batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=72.982, loss_att=57.376, acc=0.697, loss=62.058, backward_time=0.751, grad_norm=113.686, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.362e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 21:18:43,104 (trainer:732) INFO: 19epoch:train:8801-8900batch: iter_time=9.576e-05, forward_time=0.109, loss_ctc=66.077, loss_att=53.057, acc=0.696, loss=56.963, backward_time=0.753, grad_norm=81.859, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.360e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 21:20:22,718 (trainer:732) INFO: 19epoch:train:8901-9000batch: iter_time=1.008e-04, forward_time=0.108, loss_ctc=65.904, loss_att=53.143, acc=0.700, loss=56.972, backward_time=0.751, grad_norm=84.308, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.358e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 21:22:02,446 (trainer:732) INFO: 19epoch:train:9001-9100batch: iter_time=9.656e-05, forward_time=0.108, loss_ctc=77.892, loss_att=65.356, acc=0.694, loss=69.117, backward_time=0.752, grad_norm=101.891, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.355e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 21:23:10,807 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 21:23:29,796 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 21:23:33,339 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 21:23:33,339 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 21:23:33,345 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 21:27:24,475 (trainer:732) INFO: 19epoch:train:9101-9200batch: iter_time=1.327, forward_time=0.109, loss_ctc=75.034, loss_att=57.612, acc=0.680, loss=62.839, backward_time=0.762, grad_norm=88.253, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.353e-05, train_time=6.440 +[gpua003:0/64] 2023-07-06 21:29:05,161 (trainer:732) INFO: 19epoch:train:9201-9300batch: iter_time=9.736e-05, forward_time=0.108, loss_ctc=71.238, loss_att=51.142, acc=0.705, loss=57.170, backward_time=0.754, grad_norm=81.302, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.351e-05, train_time=2.013 +[gpua003:0/64] 2023-07-06 21:30:46,142 (trainer:732) INFO: 19epoch:train:9301-9400batch: iter_time=1.061e-04, forward_time=0.109, loss_ctc=74.369, loss_att=57.598, acc=0.690, loss=62.630, backward_time=0.752, grad_norm=101.798, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.348e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 21:32:26,233 (trainer:732) INFO: 19epoch:train:9401-9500batch: iter_time=9.370e-05, forward_time=0.109, loss_ctc=67.835, loss_att=50.281, acc=0.706, loss=55.547, backward_time=0.752, grad_norm=88.722, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.346e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 21:34:06,640 (trainer:732) INFO: 19epoch:train:9501-9600batch: iter_time=1.055e-04, forward_time=0.110, loss_ctc=64.836, loss_att=52.472, acc=0.696, loss=56.181, backward_time=0.753, grad_norm=90.882, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.344e-05, train_time=2.008 +[gpua003:0/64] 2023-07-06 21:36:03,018 (trainer:732) INFO: 19epoch:train:9601-9700batch: iter_time=7.040e-04, forward_time=0.150, loss_ctc=70.591, loss_att=59.315, acc=0.695, loss=62.698, backward_time=0.779, grad_norm=92.334, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.341e-05, train_time=2.327 +[gpua003:0/64] 2023-07-06 21:37:44,980 (trainer:732) INFO: 19epoch:train:9701-9800batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=69.523, loss_att=55.550, acc=0.707, loss=59.741, backward_time=0.753, grad_norm=96.147, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.339e-05, train_time=2.039 +[gpua003:0/64] 2023-07-06 21:39:24,859 (trainer:732) INFO: 19epoch:train:9801-9900batch: iter_time=1.053e-04, forward_time=0.110, loss_ctc=73.187, loss_att=59.236, acc=0.717, loss=63.421, backward_time=0.753, grad_norm=85.059, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.337e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 21:41:04,508 (trainer:732) INFO: 19epoch:train:9901-10000batch: iter_time=9.273e-05, forward_time=0.108, loss_ctc=79.916, loss_att=59.698, acc=0.692, loss=65.764, backward_time=0.751, grad_norm=87.679, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.334e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 21:53:26,999 (trainer:338) INFO: 19epoch results: [train] iter_time=0.201, forward_time=0.110, loss_ctc=72.442, loss_att=56.449, acc=0.691, loss=61.247, backward_time=0.757, grad_norm=94.422, clip=100.000, loss_scale=7.494e+17, optim_step_time=0.113, optim0_lr0=8.452e-05, train_time=2.614, time=3 hours, 38 minutes and 13.18 seconds, total_count=160000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.578, cer_ctc=0.291, loss_att=43.395, acc=0.637, cer=0.442, wer=1.000, loss=45.550, time=5 minutes and 44.01 seconds, total_count=16698, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 18.47 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-06 21:53:45,891 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-06 21:53:45,932 (trainer:272) INFO: 20/100epoch started. Estimated time to finish: 1 week, 6 days and 1 hour +[gpua003:0/64] 2023-07-06 21:53:46,828 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 21:54:05,845 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 21:54:10,947 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 21:54:10,947 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 21:54:11,034 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 22:01:50,792 (trainer:732) INFO: 20epoch:train:1-100batch: iter_time=3.782, forward_time=0.136, loss_ctc=71.854, loss_att=51.948, acc=0.710, loss=57.919, backward_time=0.767, grad_norm=91.560, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.332e-05, train_time=9.690 +[gpua003:0/64] 2023-07-06 22:03:30,984 (trainer:732) INFO: 20epoch:train:101-200batch: iter_time=9.746e-05, forward_time=0.108, loss_ctc=68.269, loss_att=50.073, acc=0.698, loss=55.532, backward_time=0.752, grad_norm=89.704, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.330e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 22:05:13,770 (trainer:732) INFO: 20epoch:train:201-300batch: iter_time=1.101e-04, forward_time=0.109, loss_ctc=76.182, loss_att=54.086, acc=0.680, loss=60.715, backward_time=0.754, grad_norm=92.398, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.327e-05, train_time=2.056 +[gpua003:0/64] 2023-07-06 22:06:54,574 (trainer:732) INFO: 20epoch:train:301-400batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=72.772, loss_att=56.293, acc=0.685, loss=61.237, backward_time=0.751, grad_norm=105.066, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.325e-05, train_time=2.016 +[gpua003:0/64] 2023-07-06 22:08:34,689 (trainer:732) INFO: 20epoch:train:401-500batch: iter_time=1.021e-04, forward_time=0.106, loss_ctc=75.522, loss_att=55.332, acc=0.684, loss=61.389, backward_time=0.749, grad_norm=97.945, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.323e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 22:10:14,287 (trainer:732) INFO: 20epoch:train:501-600batch: iter_time=1.003e-04, forward_time=0.107, loss_ctc=73.512, loss_att=56.609, acc=0.702, loss=61.680, backward_time=0.750, grad_norm=84.685, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.321e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 22:11:54,452 (trainer:732) INFO: 20epoch:train:601-700batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=78.698, loss_att=54.562, acc=0.686, loss=61.803, backward_time=0.750, grad_norm=98.035, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.318e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 22:13:40,823 (trainer:732) INFO: 20epoch:train:701-800batch: iter_time=1.096e-04, forward_time=0.108, loss_ctc=89.847, loss_att=64.589, acc=0.689, loss=72.167, backward_time=0.762, grad_norm=100.675, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.316e-05, train_time=2.127 +[gpua003:0/64] 2023-07-06 22:14:20,761 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 22:14:39,405 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 22:14:43,119 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 22:14:43,119 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 22:14:43,125 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 22:18:32,284 (trainer:732) INFO: 20epoch:train:801-900batch: iter_time=1.362, forward_time=0.108, loss_ctc=71.198, loss_att=53.976, acc=0.705, loss=59.142, backward_time=0.768, grad_norm=80.563, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.314e-05, train_time=5.829 +[gpua003:0/64] 2023-07-06 22:20:13,201 (trainer:732) INFO: 20epoch:train:901-1000batch: iter_time=9.604e-05, forward_time=0.108, loss_ctc=67.658, loss_att=48.534, acc=0.705, loss=54.271, backward_time=0.755, grad_norm=86.115, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.311e-05, train_time=2.018 +[gpua003:0/64] 2023-07-06 22:21:53,069 (trainer:732) INFO: 20epoch:train:1001-1100batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=75.100, loss_att=54.061, acc=0.692, loss=60.372, backward_time=0.753, grad_norm=94.035, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.309e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 22:23:34,205 (trainer:732) INFO: 20epoch:train:1101-1200batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=75.994, loss_att=54.925, acc=0.686, loss=61.246, backward_time=0.754, grad_norm=94.166, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.307e-05, train_time=2.022 +[gpua003:0/64] 2023-07-06 22:25:14,385 (trainer:732) INFO: 20epoch:train:1201-1300batch: iter_time=9.688e-05, forward_time=0.108, loss_ctc=68.867, loss_att=52.161, acc=0.694, loss=57.173, backward_time=0.752, grad_norm=88.133, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.304e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 22:26:54,236 (trainer:732) INFO: 20epoch:train:1301-1400batch: iter_time=9.936e-05, forward_time=0.108, loss_ctc=74.280, loss_att=59.054, acc=0.691, loss=63.622, backward_time=0.753, grad_norm=81.828, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.302e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 22:28:34,189 (trainer:732) INFO: 20epoch:train:1401-1500batch: iter_time=9.366e-05, forward_time=0.108, loss_ctc=72.329, loss_att=51.203, acc=0.701, loss=57.541, backward_time=0.752, grad_norm=88.414, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.300e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 22:30:15,525 (trainer:732) INFO: 20epoch:train:1501-1600batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=91.703, loss_att=61.724, acc=0.683, loss=70.718, backward_time=0.754, grad_norm=106.713, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.298e-05, train_time=2.026 +[gpua003:0/64] 2023-07-06 22:31:22,752 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 22:31:42,368 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 22:31:46,184 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 22:31:46,184 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 22:31:46,190 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 22:35:07,262 (trainer:732) INFO: 20epoch:train:1601-1700batch: iter_time=1.291, forward_time=0.108, loss_ctc=71.960, loss_att=56.558, acc=0.711, loss=61.178, backward_time=0.764, grad_norm=84.868, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.295e-05, train_time=5.835 +[gpua003:0/64] 2023-07-06 22:36:48,148 (trainer:732) INFO: 20epoch:train:1701-1800batch: iter_time=1.106e-04, forward_time=0.109, loss_ctc=70.844, loss_att=53.568, acc=0.697, loss=58.751, backward_time=0.755, grad_norm=109.667, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.293e-05, train_time=2.017 +[gpua003:0/64] 2023-07-06 22:38:27,849 (trainer:732) INFO: 20epoch:train:1801-1900batch: iter_time=9.001e-05, forward_time=0.108, loss_ctc=70.234, loss_att=49.729, acc=0.708, loss=55.881, backward_time=0.752, grad_norm=104.621, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.291e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 22:40:07,934 (trainer:732) INFO: 20epoch:train:1901-2000batch: iter_time=9.188e-05, forward_time=0.109, loss_ctc=74.679, loss_att=53.534, acc=0.678, loss=59.877, backward_time=0.753, grad_norm=99.994, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.288e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 22:41:47,719 (trainer:732) INFO: 20epoch:train:2001-2100batch: iter_time=9.469e-05, forward_time=0.109, loss_ctc=72.452, loss_att=57.103, acc=0.685, loss=61.708, backward_time=0.753, grad_norm=88.374, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.286e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 22:43:27,515 (trainer:732) INFO: 20epoch:train:2101-2200batch: iter_time=9.559e-05, forward_time=0.108, loss_ctc=72.276, loss_att=54.565, acc=0.690, loss=59.878, backward_time=0.753, grad_norm=85.377, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.284e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 22:45:07,265 (trainer:732) INFO: 20epoch:train:2201-2300batch: iter_time=1.133e-04, forward_time=0.109, loss_ctc=70.952, loss_att=53.601, acc=0.697, loss=58.807, backward_time=0.752, grad_norm=91.829, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.282e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 22:46:46,964 (trainer:732) INFO: 20epoch:train:2301-2400batch: iter_time=1.050e-04, forward_time=0.109, loss_ctc=82.613, loss_att=58.490, acc=0.677, loss=65.727, backward_time=0.751, grad_norm=108.551, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.279e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 22:48:27,085 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 22:48:46,472 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 22:48:50,356 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 22:48:50,356 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 22:48:50,362 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 22:53:34,506 (trainer:732) INFO: 20epoch:train:2401-2500batch: iter_time=2.844, forward_time=0.130, loss_ctc=80.783, loss_att=60.279, acc=0.694, loss=66.430, backward_time=0.756, grad_norm=95.454, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.115, optim0_lr0=8.277e-05, train_time=8.151 +[gpua003:0/64] 2023-07-06 22:55:17,165 (trainer:732) INFO: 20epoch:train:2501-2600batch: iter_time=9.896e-05, forward_time=0.113, loss_ctc=70.675, loss_att=49.400, acc=0.713, loss=55.783, backward_time=0.761, grad_norm=88.563, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.275e-05, train_time=2.053 +[gpua003:0/64] 2023-07-06 22:56:57,735 (trainer:732) INFO: 20epoch:train:2601-2700batch: iter_time=9.556e-05, forward_time=0.108, loss_ctc=74.837, loss_att=54.096, acc=0.700, loss=60.318, backward_time=0.756, grad_norm=95.551, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.273e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 22:58:38,514 (trainer:732) INFO: 20epoch:train:2701-2800batch: iter_time=9.973e-05, forward_time=0.107, loss_ctc=75.135, loss_att=51.859, acc=0.679, loss=58.842, backward_time=0.752, grad_norm=88.078, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.270e-05, train_time=2.015 +[gpua003:0/64] 2023-07-06 23:00:20,949 (trainer:732) INFO: 20epoch:train:2801-2900batch: iter_time=9.541e-05, forward_time=0.108, loss_ctc=69.255, loss_att=55.518, acc=0.700, loss=59.639, backward_time=0.755, grad_norm=80.641, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.268e-05, train_time=2.048 +[gpua003:0/64] 2023-07-06 23:02:14,547 (trainer:732) INFO: 20epoch:train:2901-3000batch: iter_time=9.836e-05, forward_time=0.109, loss_ctc=74.405, loss_att=53.787, acc=0.692, loss=59.972, backward_time=0.782, grad_norm=90.228, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.266e-05, train_time=2.272 +[gpua003:0/64] 2023-07-06 23:03:54,862 (trainer:732) INFO: 20epoch:train:3001-3100batch: iter_time=9.682e-05, forward_time=0.109, loss_ctc=68.569, loss_att=51.720, acc=0.703, loss=56.774, backward_time=0.753, grad_norm=80.182, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.264e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 23:05:36,742 (trainer:732) INFO: 20epoch:train:3101-3200batch: iter_time=9.416e-05, forward_time=0.110, loss_ctc=81.585, loss_att=57.636, acc=0.695, loss=64.820, backward_time=0.756, grad_norm=114.022, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.261e-05, train_time=2.037 +[gpua003:0/64] 2023-07-06 23:07:16,980 (trainer:732) INFO: 20epoch:train:3201-3300batch: iter_time=9.267e-05, forward_time=0.109, loss_ctc=83.278, loss_att=62.301, acc=0.690, loss=68.594, backward_time=0.752, grad_norm=102.383, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.259e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 23:07:51,559 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 23:08:11,129 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 23:08:14,689 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 23:08:14,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 23:08:14,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 23:13:03,956 (trainer:732) INFO: 20epoch:train:3301-3400batch: iter_time=1.301, forward_time=0.109, loss_ctc=78.861, loss_att=56.676, acc=0.700, loss=63.331, backward_time=0.769, grad_norm=114.958, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.257e-05, train_time=6.939 +[gpua003:0/64] 2023-07-06 23:14:44,012 (trainer:732) INFO: 20epoch:train:3401-3500batch: iter_time=9.664e-05, forward_time=0.108, loss_ctc=68.025, loss_att=50.075, acc=0.699, loss=55.460, backward_time=0.753, grad_norm=84.489, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.255e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 23:16:27,830 (trainer:732) INFO: 20epoch:train:3501-3600batch: iter_time=9.904e-05, forward_time=0.108, loss_ctc=74.312, loss_att=52.930, acc=0.693, loss=59.345, backward_time=0.765, grad_norm=93.179, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.252e-05, train_time=2.076 +[gpua003:0/64] 2023-07-06 23:18:08,117 (trainer:732) INFO: 20epoch:train:3601-3700batch: iter_time=1.068e-04, forward_time=0.110, loss_ctc=73.489, loss_att=54.029, acc=0.682, loss=59.867, backward_time=0.753, grad_norm=96.418, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.250e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 23:19:47,962 (trainer:732) INFO: 20epoch:train:3701-3800batch: iter_time=9.700e-05, forward_time=0.107, loss_ctc=67.104, loss_att=51.126, acc=0.699, loss=55.919, backward_time=0.751, grad_norm=85.122, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.248e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 23:21:27,624 (trainer:732) INFO: 20epoch:train:3801-3900batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=73.938, loss_att=60.096, acc=0.686, loss=64.249, backward_time=0.751, grad_norm=89.373, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.246e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 23:23:07,236 (trainer:732) INFO: 20epoch:train:3901-4000batch: iter_time=1.016e-04, forward_time=0.108, loss_ctc=72.519, loss_att=51.367, acc=0.692, loss=57.713, backward_time=0.752, grad_norm=97.338, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.243e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 23:24:46,969 (trainer:732) INFO: 20epoch:train:4001-4100batch: iter_time=9.761e-05, forward_time=0.108, loss_ctc=88.906, loss_att=60.730, acc=0.683, loss=69.183, backward_time=0.751, grad_norm=111.352, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.241e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 23:25:53,953 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 23:26:13,125 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 23:26:16,688 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 23:26:16,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 23:26:16,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 23:29:48,281 (trainer:732) INFO: 20epoch:train:4101-4200batch: iter_time=1.285, forward_time=0.108, loss_ctc=71.787, loss_att=56.750, acc=0.703, loss=61.261, backward_time=0.763, grad_norm=89.955, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.239e-05, train_time=6.026 +[gpua003:0/64] 2023-07-06 23:31:29,587 (trainer:732) INFO: 20epoch:train:4201-4300batch: iter_time=1.041e-04, forward_time=0.109, loss_ctc=70.768, loss_att=51.030, acc=0.713, loss=56.951, backward_time=0.758, grad_norm=86.739, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.237e-05, train_time=2.026 +[gpua003:0/64] 2023-07-06 23:33:09,537 (trainer:732) INFO: 20epoch:train:4301-4400batch: iter_time=1.101e-04, forward_time=0.108, loss_ctc=69.008, loss_att=48.198, acc=0.717, loss=54.441, backward_time=0.752, grad_norm=98.169, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.234e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 23:34:49,543 (trainer:732) INFO: 20epoch:train:4401-4500batch: iter_time=1.108e-04, forward_time=0.108, loss_ctc=73.392, loss_att=51.724, acc=0.689, loss=58.225, backward_time=0.753, grad_norm=99.811, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.232e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 23:36:29,633 (trainer:732) INFO: 20epoch:train:4501-4600batch: iter_time=1.189e-04, forward_time=0.109, loss_ctc=71.304, loss_att=56.277, acc=0.699, loss=60.785, backward_time=0.753, grad_norm=84.933, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.230e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 23:38:09,605 (trainer:732) INFO: 20epoch:train:4601-4700batch: iter_time=1.208e-04, forward_time=0.108, loss_ctc=70.498, loss_att=54.270, acc=0.697, loss=59.139, backward_time=0.752, grad_norm=83.394, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.228e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 23:39:49,275 (trainer:732) INFO: 20epoch:train:4701-4800batch: iter_time=1.190e-04, forward_time=0.107, loss_ctc=71.120, loss_att=52.173, acc=0.708, loss=57.857, backward_time=0.750, grad_norm=96.519, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.225e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 23:41:28,953 (trainer:732) INFO: 20epoch:train:4801-4900batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=81.570, loss_att=58.452, acc=0.689, loss=65.387, backward_time=0.750, grad_norm=95.091, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.223e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 23:43:09,201 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 23:43:28,281 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 23:43:31,799 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 23:43:31,799 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 23:43:31,806 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 23:46:22,528 (trainer:732) INFO: 20epoch:train:4901-5000batch: iter_time=1.282, forward_time=0.108, loss_ctc=79.944, loss_att=59.404, acc=0.708, loss=65.566, backward_time=0.755, grad_norm=106.026, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.221e-05, train_time=5.871 +[gpua003:0/64] 2023-07-06 23:48:04,124 (trainer:732) INFO: 20epoch:train:5001-5100batch: iter_time=1.074e-04, forward_time=0.107, loss_ctc=73.293, loss_att=52.693, acc=0.711, loss=58.873, backward_time=0.757, grad_norm=83.888, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.219e-05, train_time=2.032 +[gpua003:0/64] 2023-07-06 23:49:44,402 (trainer:732) INFO: 20epoch:train:5101-5200batch: iter_time=1.044e-04, forward_time=0.106, loss_ctc=67.920, loss_att=49.443, acc=0.700, loss=54.986, backward_time=0.751, grad_norm=92.376, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.217e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 23:51:24,233 (trainer:732) INFO: 20epoch:train:5201-5300batch: iter_time=1.049e-04, forward_time=0.106, loss_ctc=77.875, loss_att=55.129, acc=0.680, loss=61.953, backward_time=0.751, grad_norm=110.700, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.214e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 23:53:04,201 (trainer:732) INFO: 20epoch:train:5301-5400batch: iter_time=1.111e-04, forward_time=0.107, loss_ctc=66.903, loss_att=50.769, acc=0.700, loss=55.609, backward_time=0.751, grad_norm=80.652, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.212e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 23:54:50,887 (trainer:732) INFO: 20epoch:train:5401-5500batch: iter_time=1.171e-04, forward_time=0.107, loss_ctc=73.965, loss_att=54.060, acc=0.695, loss=60.031, backward_time=0.764, grad_norm=92.000, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.210e-05, train_time=2.133 +[gpua003:0/64] 2023-07-06 23:56:33,008 (trainer:732) INFO: 20epoch:train:5501-5600batch: iter_time=1.043e-04, forward_time=0.108, loss_ctc=72.161, loss_att=57.079, acc=0.703, loss=61.603, backward_time=0.754, grad_norm=88.705, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.208e-05, train_time=2.042 +[gpua003:0/64] 2023-07-06 23:58:12,856 (trainer:732) INFO: 20epoch:train:5601-5700batch: iter_time=1.017e-04, forward_time=0.109, loss_ctc=76.027, loss_att=53.040, acc=0.686, loss=59.936, backward_time=0.753, grad_norm=101.954, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.205e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 23:59:52,835 (trainer:732) INFO: 20epoch:train:5701-5800batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=83.943, loss_att=63.565, acc=0.695, loss=69.678, backward_time=0.753, grad_norm=152.716, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.114, optim0_lr0=8.203e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 00:00:26,092 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-07 00:00:45,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 00:00:48,757 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 00:00:48,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-07 00:00:48,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 00:05:13,212 (trainer:732) INFO: 20epoch:train:5801-5900batch: iter_time=1.287, forward_time=0.108, loss_ctc=69.493, loss_att=50.272, acc=0.713, loss=56.039, backward_time=0.767, grad_norm=90.186, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.201e-05, train_time=6.407 +[gpua003:0/64] 2023-07-07 00:06:53,937 (trainer:732) INFO: 20epoch:train:5901-6000batch: iter_time=9.985e-05, forward_time=0.107, loss_ctc=67.619, loss_att=48.654, acc=0.710, loss=54.344, backward_time=0.755, grad_norm=80.945, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.199e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 00:08:34,265 (trainer:732) INFO: 20epoch:train:6001-6100batch: iter_time=9.513e-05, forward_time=0.107, loss_ctc=72.368, loss_att=52.436, acc=0.699, loss=58.415, backward_time=0.754, grad_norm=99.544, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.197e-05, train_time=2.006 +[gpua003:0/64] 2023-07-07 00:10:14,094 (trainer:732) INFO: 20epoch:train:6101-6200batch: iter_time=1.063e-04, forward_time=0.107, loss_ctc=72.573, loss_att=53.094, acc=0.693, loss=58.938, backward_time=0.752, grad_norm=96.528, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.194e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 00:11:54,093 (trainer:732) INFO: 20epoch:train:6201-6300batch: iter_time=9.448e-05, forward_time=0.108, loss_ctc=67.484, loss_att=50.808, acc=0.704, loss=55.811, backward_time=0.753, grad_norm=108.855, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.192e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 00:13:34,349 (trainer:732) INFO: 20epoch:train:6301-6400batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=72.464, loss_att=56.785, acc=0.702, loss=61.489, backward_time=0.753, grad_norm=87.129, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.190e-05, train_time=2.005 +[gpua003:0/64] 2023-07-07 00:15:14,312 (trainer:732) INFO: 20epoch:train:6401-6500batch: iter_time=9.419e-05, forward_time=0.108, loss_ctc=71.280, loss_att=50.161, acc=0.709, loss=56.497, backward_time=0.754, grad_norm=98.013, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.188e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 00:16:54,147 (trainer:732) INFO: 20epoch:train:6501-6600batch: iter_time=1.010e-04, forward_time=0.107, loss_ctc=90.171, loss_att=63.851, acc=0.685, loss=71.747, backward_time=0.753, grad_norm=108.463, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.186e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 00:18:00,433 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-07 00:18:19,726 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 00:18:23,276 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 00:18:23,276 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-07 00:18:23,282 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 00:22:45,396 (trainer:732) INFO: 20epoch:train:6601-6700batch: iter_time=1.296, forward_time=0.108, loss_ctc=74.059, loss_att=56.550, acc=0.707, loss=61.803, backward_time=0.761, grad_norm=91.757, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.183e-05, train_time=7.025 +[gpua003:0/64] 2023-07-07 00:24:26,019 (trainer:732) INFO: 20epoch:train:6701-6800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=69.347, loss_att=50.947, acc=0.699, loss=56.467, backward_time=0.755, grad_norm=89.501, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.181e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 00:26:06,137 (trainer:732) INFO: 20epoch:train:6801-6900batch: iter_time=1.227e-04, forward_time=0.107, loss_ctc=70.173, loss_att=49.974, acc=0.698, loss=56.034, backward_time=0.751, grad_norm=91.982, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.179e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 00:27:45,899 (trainer:732) INFO: 20epoch:train:6901-7000batch: iter_time=1.212e-04, forward_time=0.108, loss_ctc=71.799, loss_att=52.784, acc=0.684, loss=58.488, backward_time=0.752, grad_norm=105.648, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.177e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 00:29:25,719 (trainer:732) INFO: 20epoch:train:7001-7100batch: iter_time=1.125e-04, forward_time=0.107, loss_ctc=69.357, loss_att=53.933, acc=0.692, loss=58.560, backward_time=0.752, grad_norm=90.499, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.175e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 00:31:05,462 (trainer:732) INFO: 20epoch:train:7101-7200batch: iter_time=1.086e-04, forward_time=0.108, loss_ctc=68.020, loss_att=53.832, acc=0.699, loss=58.088, backward_time=0.752, grad_norm=108.903, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.173e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 00:32:45,164 (trainer:732) INFO: 20epoch:train:7201-7300batch: iter_time=1.177e-04, forward_time=0.108, loss_ctc=71.562, loss_att=52.820, acc=0.698, loss=58.443, backward_time=0.753, grad_norm=92.492, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.170e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 00:34:25,792 (trainer:732) INFO: 20epoch:train:7301-7400batch: iter_time=1.149e-04, forward_time=0.108, loss_ctc=84.374, loss_att=58.985, acc=0.686, loss=66.602, backward_time=0.752, grad_norm=103.222, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.168e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 00:36:05,793 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-07 00:36:24,910 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 00:36:28,439 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 00:36:28,439 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-07 00:36:28,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 00:39:39,875 (trainer:732) INFO: 20epoch:train:7401-7500batch: iter_time=1.300, forward_time=0.108, loss_ctc=82.541, loss_att=62.936, acc=0.691, loss=68.817, backward_time=0.758, grad_norm=115.515, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.166e-05, train_time=6.281 +[gpua003:0/64] 2023-07-07 00:41:23,429 (trainer:732) INFO: 20epoch:train:7501-7600batch: iter_time=9.637e-05, forward_time=0.109, loss_ctc=69.225, loss_att=48.998, acc=0.720, loss=55.066, backward_time=0.763, grad_norm=96.997, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.164e-05, train_time=2.071 +[gpua003:0/64] 2023-07-07 00:43:03,727 (trainer:732) INFO: 20epoch:train:7601-7700batch: iter_time=1.074e-04, forward_time=0.107, loss_ctc=74.248, loss_att=54.779, acc=0.704, loss=60.620, backward_time=0.752, grad_norm=92.841, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.162e-05, train_time=2.006 +[gpua003:0/64] 2023-07-07 00:44:44,287 (trainer:732) INFO: 20epoch:train:7701-7800batch: iter_time=1.079e-04, forward_time=0.107, loss_ctc=73.724, loss_att=51.407, acc=0.683, loss=58.102, backward_time=0.753, grad_norm=97.431, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.159e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 00:46:24,185 (trainer:732) INFO: 20epoch:train:7801-7900batch: iter_time=9.605e-05, forward_time=0.108, loss_ctc=69.983, loss_att=56.359, acc=0.700, loss=60.446, backward_time=0.753, grad_norm=78.300, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.157e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 00:48:04,263 (trainer:732) INFO: 20epoch:train:7901-8000batch: iter_time=9.780e-05, forward_time=0.109, loss_ctc=71.676, loss_att=53.364, acc=0.698, loss=58.857, backward_time=0.754, grad_norm=93.587, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.155e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 00:49:45,962 (trainer:732) INFO: 20epoch:train:8001-8100batch: iter_time=9.342e-05, forward_time=0.108, loss_ctc=67.152, loss_att=50.735, acc=0.707, loss=55.660, backward_time=0.754, grad_norm=96.944, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.153e-05, train_time=2.034 +[gpua003:0/64] 2023-07-07 00:51:26,537 (trainer:732) INFO: 20epoch:train:8101-8200batch: iter_time=9.864e-05, forward_time=0.108, loss_ctc=81.176, loss_att=58.498, acc=0.696, loss=65.302, backward_time=0.753, grad_norm=108.921, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.151e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 00:53:07,235 (trainer:732) INFO: 20epoch:train:8201-8300batch: iter_time=1.006e-04, forward_time=0.108, loss_ctc=79.556, loss_att=61.957, acc=0.691, loss=67.237, backward_time=0.752, grad_norm=109.467, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.149e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 00:53:41,518 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-07 00:54:00,863 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 00:54:04,680 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 00:54:04,680 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-07 00:54:04,686 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 00:58:25,770 (trainer:732) INFO: 20epoch:train:8301-8400batch: iter_time=1.273, forward_time=0.108, loss_ctc=78.310, loss_att=60.461, acc=0.698, loss=65.816, backward_time=0.767, grad_norm=117.559, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.146e-05, train_time=6.370 +[gpua003:0/64] 2023-07-07 01:00:07,312 (trainer:732) INFO: 20epoch:train:8401-8500batch: iter_time=1.152e-04, forward_time=0.108, loss_ctc=67.962, loss_att=49.896, acc=0.703, loss=55.316, backward_time=0.754, grad_norm=90.877, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.114, optim0_lr0=8.144e-05, train_time=2.031 +[gpua003:0/64] 2023-07-07 01:01:48,019 (trainer:732) INFO: 20epoch:train:8501-8600batch: iter_time=1.031e-04, forward_time=0.110, loss_ctc=68.584, loss_att=49.176, acc=0.698, loss=54.998, backward_time=0.755, grad_norm=93.992, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.142e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 01:03:36,281 (trainer:732) INFO: 20epoch:train:8601-8700batch: iter_time=9.241e-05, forward_time=0.108, loss_ctc=75.435, loss_att=53.797, acc=0.684, loss=60.288, backward_time=0.759, grad_norm=99.257, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.140e-05, train_time=2.165 +[gpua003:0/64] 2023-07-07 01:05:22,596 (trainer:732) INFO: 20epoch:train:8701-8800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=69.540, loss_att=53.032, acc=0.705, loss=57.984, backward_time=0.759, grad_norm=88.332, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.138e-05, train_time=2.126 +[gpua003:0/64] 2023-07-07 01:07:02,503 (trainer:732) INFO: 20epoch:train:8801-8900batch: iter_time=1.033e-04, forward_time=0.108, loss_ctc=71.565, loss_att=57.189, acc=0.691, loss=61.502, backward_time=0.751, grad_norm=90.049, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.136e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 01:08:47,374 (trainer:732) INFO: 20epoch:train:8901-9000batch: iter_time=9.529e-05, forward_time=0.108, loss_ctc=74.747, loss_att=50.418, acc=0.695, loss=57.717, backward_time=0.757, grad_norm=98.679, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.134e-05, train_time=2.097 +[gpua003:0/64] 2023-07-07 01:10:27,736 (trainer:732) INFO: 20epoch:train:9001-9100batch: iter_time=9.856e-05, forward_time=0.107, loss_ctc=83.548, loss_att=63.248, acc=0.685, loss=69.338, backward_time=0.751, grad_norm=111.462, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.131e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 01:11:37,029 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-07 01:11:56,346 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 01:12:00,135 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 01:12:00,135 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-07 01:12:00,141 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 01:16:34,776 (trainer:732) INFO: 20epoch:train:9101-9200batch: iter_time=1.308, forward_time=0.108, loss_ctc=72.185, loss_att=56.405, acc=0.695, loss=61.139, backward_time=0.773, grad_norm=115.543, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.129e-05, train_time=7.341 +[gpua003:0/64] 2023-07-07 01:18:15,925 (trainer:732) INFO: 20epoch:train:9201-9300batch: iter_time=9.792e-05, forward_time=0.106, loss_ctc=71.745, loss_att=52.813, acc=0.704, loss=58.492, backward_time=0.754, grad_norm=89.778, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.127e-05, train_time=2.023 +[gpua003:0/64] 2023-07-07 01:19:59,197 (trainer:732) INFO: 20epoch:train:9301-9400batch: iter_time=9.737e-05, forward_time=0.107, loss_ctc=69.353, loss_att=48.395, acc=0.713, loss=54.682, backward_time=0.753, grad_norm=88.881, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.125e-05, train_time=2.065 +[gpua003:0/64] 2023-07-07 01:21:40,857 (trainer:732) INFO: 20epoch:train:9401-9500batch: iter_time=1.001e-04, forward_time=0.107, loss_ctc=72.030, loss_att=51.469, acc=0.684, loss=57.637, backward_time=0.753, grad_norm=97.051, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.123e-05, train_time=2.033 +[gpua003:0/64] 2023-07-07 01:23:21,684 (trainer:732) INFO: 20epoch:train:9501-9600batch: iter_time=9.299e-05, forward_time=0.107, loss_ctc=71.994, loss_att=56.267, acc=0.690, loss=60.985, backward_time=0.753, grad_norm=91.397, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.121e-05, train_time=2.016 +[gpua003:0/64] 2023-07-07 01:25:01,632 (trainer:732) INFO: 20epoch:train:9601-9700batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=70.203, loss_att=53.311, acc=0.696, loss=58.379, backward_time=0.753, grad_norm=104.136, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.118e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 01:26:41,518 (trainer:732) INFO: 20epoch:train:9701-9800batch: iter_time=8.494e-05, forward_time=0.107, loss_ctc=70.139, loss_att=51.677, acc=0.703, loss=57.216, backward_time=0.753, grad_norm=87.042, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.116e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 01:28:24,627 (trainer:732) INFO: 20epoch:train:9801-9900batch: iter_time=9.340e-05, forward_time=0.107, loss_ctc=82.680, loss_att=58.302, acc=0.679, loss=65.616, backward_time=0.763, grad_norm=95.875, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.114e-05, train_time=2.062 +[gpua003:0/64] 2023-07-07 01:30:09,616 (trainer:732) INFO: 20epoch:train:9901-10000batch: iter_time=9.389e-05, forward_time=0.107, loss_ctc=77.669, loss_att=57.909, acc=0.701, loss=63.837, backward_time=0.757, grad_norm=107.681, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.112e-05, train_time=2.100 +[gpua003:0/64] 2023-07-07 01:42:21,427 (trainer:338) INFO: 20epoch results: [train] iter_time=0.196, forward_time=0.108, loss_ctc=73.961, loss_att=54.526, acc=0.696, loss=60.357, backward_time=0.755, grad_norm=96.055, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.221e-05, train_time=2.596, time=3 hours, 36 minutes and 32.76 seconds, total_count=170000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=49.387, cer_ctc=0.283, loss_att=42.558, acc=0.643, cer=0.424, wer=0.999, loss=44.607, time=5 minutes and 50.92 seconds, total_count=17710, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 11.77 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-07 01:42:37,043 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-07 01:42:37,073 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till20epoch.pth +[gpua003:0/64] 2023-07-07 01:43:32,066 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till20epoch.pth +[gpua003:0/64] 2023-07-07 01:43:57,407 (trainer:272) INFO: 21/100epoch started. Estimated time to finish: 1 week, 5 days and 21 hours +[gpua003:0/64] 2023-07-07 01:43:58,913 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-07 01:44:18,582 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 01:44:24,346 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 01:44:24,346 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-07 01:44:24,417 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 01:50:53,031 (trainer:732) INFO: 21epoch:train:1-100batch: iter_time=3.069, forward_time=0.137, loss_ctc=75.425, loss_att=60.964, acc=0.696, loss=65.302, backward_time=0.770, grad_norm=105.263, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.117, optim0_lr0=8.110e-05, train_time=8.297 +[gpua003:0/64] 2023-07-07 01:52:34,428 (trainer:732) INFO: 21epoch:train:101-200batch: iter_time=1.033e-04, forward_time=0.109, loss_ctc=66.860, loss_att=53.856, acc=0.695, loss=57.757, backward_time=0.754, grad_norm=87.309, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.108e-05, train_time=2.028 +[gpua003:0/64] 2023-07-07 01:54:16,169 (trainer:732) INFO: 21epoch:train:201-300batch: iter_time=1.017e-04, forward_time=0.108, loss_ctc=68.585, loss_att=49.706, acc=0.718, loss=55.369, backward_time=0.751, grad_norm=89.606, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.106e-05, train_time=2.035 +[gpua003:0/64] 2023-07-07 01:55:56,901 (trainer:732) INFO: 21epoch:train:301-400batch: iter_time=9.549e-05, forward_time=0.109, loss_ctc=83.868, loss_att=59.563, acc=0.677, loss=66.854, backward_time=0.751, grad_norm=118.007, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.104e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 01:57:38,518 (trainer:732) INFO: 21epoch:train:401-500batch: iter_time=8.947e-05, forward_time=0.108, loss_ctc=75.494, loss_att=55.952, acc=0.710, loss=61.815, backward_time=0.753, grad_norm=96.012, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.101e-05, train_time=2.032 +[gpua003:0/64] 2023-07-07 01:59:25,089 (trainer:732) INFO: 21epoch:train:501-600batch: iter_time=8.360e-05, forward_time=0.107, loss_ctc=77.597, loss_att=58.124, acc=0.678, loss=63.966, backward_time=0.760, grad_norm=95.102, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.099e-05, train_time=2.131 +[gpua003:0/64] 2023-07-07 02:01:14,358 (trainer:732) INFO: 21epoch:train:601-700batch: iter_time=8.784e-05, forward_time=0.108, loss_ctc=79.901, loss_att=58.953, acc=0.688, loss=65.237, backward_time=0.766, grad_norm=109.677, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.097e-05, train_time=2.185 +[gpua003:0/64] 2023-07-07 02:03:06,286 (trainer:732) INFO: 21epoch:train:701-800batch: iter_time=8.960e-05, forward_time=0.108, loss_ctc=76.140, loss_att=52.512, acc=0.702, loss=59.600, backward_time=0.764, grad_norm=108.446, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.095e-05, train_time=2.238 +[gpua003:0/64] 2023-07-07 02:03:45,470 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-07 02:04:04,788 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 02:04:08,666 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 02:04:08,666 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-07 02:04:08,672 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 02:08:05,629 (trainer:732) INFO: 21epoch:train:801-900batch: iter_time=1.390, forward_time=0.152, loss_ctc=78.626, loss_att=63.039, acc=0.675, loss=67.715, backward_time=0.773, grad_norm=107.084, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.115, optim0_lr0=8.093e-05, train_time=5.987 +[gpua003:0/64] 2023-07-07 02:09:45,991 (trainer:732) INFO: 21epoch:train:901-1000batch: iter_time=9.686e-05, forward_time=0.109, loss_ctc=65.698, loss_att=52.821, acc=0.694, loss=56.684, backward_time=0.754, grad_norm=87.102, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.091e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 02:11:25,991 (trainer:732) INFO: 21epoch:train:1001-1100batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=70.095, loss_att=51.641, acc=0.713, loss=57.177, backward_time=0.751, grad_norm=85.639, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.089e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 02:13:05,844 (trainer:732) INFO: 21epoch:train:1101-1200batch: iter_time=1.030e-04, forward_time=0.108, loss_ctc=71.874, loss_att=52.739, acc=0.696, loss=58.480, backward_time=0.752, grad_norm=119.352, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.087e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 02:14:45,543 (trainer:732) INFO: 21epoch:train:1201-1300batch: iter_time=9.500e-05, forward_time=0.108, loss_ctc=79.532, loss_att=61.570, acc=0.679, loss=66.959, backward_time=0.750, grad_norm=83.751, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.114, optim0_lr0=8.084e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 02:16:25,354 (trainer:732) INFO: 21epoch:train:1301-1400batch: iter_time=9.425e-05, forward_time=0.108, loss_ctc=70.489, loss_att=51.580, acc=0.691, loss=57.252, backward_time=0.751, grad_norm=89.038, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.082e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 02:18:05,983 (trainer:732) INFO: 21epoch:train:1401-1500batch: iter_time=9.711e-05, forward_time=0.109, loss_ctc=79.242, loss_att=59.276, acc=0.681, loss=65.266, backward_time=0.754, grad_norm=99.915, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.080e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 02:19:45,694 (trainer:732) INFO: 21epoch:train:1501-1600batch: iter_time=1.032e-04, forward_time=0.107, loss_ctc=86.448, loss_att=59.634, acc=0.687, loss=67.678, backward_time=0.752, grad_norm=119.579, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.078e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 02:21:03,476 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-07 02:21:22,929 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 02:21:26,897 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 02:21:26,897 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-07 02:21:26,904 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 02:26:35,736 (trainer:732) INFO: 21epoch:train:1601-1700batch: iter_time=3.035, forward_time=0.137, loss_ctc=74.922, loss_att=57.634, acc=0.681, loss=62.820, backward_time=0.764, grad_norm=103.929, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.115, optim0_lr0=8.076e-05, train_time=8.200 +[gpua003:0/64] 2023-07-07 02:28:16,442 (trainer:732) INFO: 21epoch:train:1701-1800batch: iter_time=9.394e-05, forward_time=0.110, loss_ctc=67.692, loss_att=51.540, acc=0.720, loss=56.386, backward_time=0.754, grad_norm=84.127, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.074e-05, train_time=2.015 +[gpua003:0/64] 2023-07-07 02:29:56,377 (trainer:732) INFO: 21epoch:train:1801-1900batch: iter_time=9.295e-05, forward_time=0.109, loss_ctc=67.942, loss_att=54.566, acc=0.708, loss=58.578, backward_time=0.753, grad_norm=95.960, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.072e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 02:31:36,002 (trainer:732) INFO: 21epoch:train:1901-2000batch: iter_time=8.542e-05, forward_time=0.108, loss_ctc=62.560, loss_att=45.274, acc=0.709, loss=50.460, backward_time=0.750, grad_norm=78.689, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.070e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 02:33:15,755 (trainer:732) INFO: 21epoch:train:2001-2100batch: iter_time=8.779e-05, forward_time=0.108, loss_ctc=83.888, loss_att=62.880, acc=0.695, loss=69.182, backward_time=0.752, grad_norm=111.465, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.068e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 02:34:55,601 (trainer:732) INFO: 21epoch:train:2101-2200batch: iter_time=9.295e-05, forward_time=0.108, loss_ctc=71.779, loss_att=53.269, acc=0.708, loss=58.822, backward_time=0.753, grad_norm=78.300, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.066e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 02:36:35,519 (trainer:732) INFO: 21epoch:train:2201-2300batch: iter_time=8.719e-05, forward_time=0.109, loss_ctc=77.907, loss_att=57.920, acc=0.681, loss=63.916, backward_time=0.753, grad_norm=94.141, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.063e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 02:38:15,578 (trainer:732) INFO: 21epoch:train:2301-2400batch: iter_time=1.077e-04, forward_time=0.109, loss_ctc=77.449, loss_att=56.410, acc=0.699, loss=62.722, backward_time=0.751, grad_norm=114.357, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.061e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 02:40:15,411 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-07 02:40:34,895 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 02:40:38,734 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 02:40:38,734 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-07 02:40:38,741 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 02:43:57,355 (trainer:732) INFO: 21epoch:train:2401-2500batch: iter_time=1.755, forward_time=0.116, loss_ctc=73.960, loss_att=51.538, acc=0.693, loss=58.265, backward_time=0.768, grad_norm=101.777, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.059e-05, train_time=6.835 +[gpua003:0/64] 2023-07-07 02:45:38,947 (trainer:732) INFO: 21epoch:train:2501-2600batch: iter_time=1.017e-04, forward_time=0.107, loss_ctc=73.796, loss_att=60.954, acc=0.696, loss=64.807, backward_time=0.760, grad_norm=102.406, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.057e-05, train_time=2.032 +[gpua003:0/64] 2023-07-07 02:47:21,878 (trainer:732) INFO: 21epoch:train:2601-2700batch: iter_time=1.022e-04, forward_time=0.107, loss_ctc=65.924, loss_att=51.785, acc=0.701, loss=56.026, backward_time=0.752, grad_norm=94.190, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.055e-05, train_time=2.058 +[gpua003:0/64] 2023-07-07 02:49:01,878 (trainer:732) INFO: 21epoch:train:2701-2800batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=68.630, loss_att=49.530, acc=0.716, loss=55.260, backward_time=0.752, grad_norm=96.790, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.053e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 02:50:43,603 (trainer:732) INFO: 21epoch:train:2801-2900batch: iter_time=1.010e-04, forward_time=0.109, loss_ctc=78.316, loss_att=57.443, acc=0.680, loss=63.705, backward_time=0.757, grad_norm=110.859, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.051e-05, train_time=2.034 +[gpua003:0/64] 2023-07-07 02:52:23,596 (trainer:732) INFO: 21epoch:train:2901-3000batch: iter_time=9.699e-05, forward_time=0.110, loss_ctc=74.531, loss_att=55.316, acc=0.713, loss=61.081, backward_time=0.754, grad_norm=91.694, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.049e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 02:54:03,277 (trainer:732) INFO: 21epoch:train:3001-3100batch: iter_time=1.094e-04, forward_time=0.108, loss_ctc=72.826, loss_att=52.947, acc=0.686, loss=58.911, backward_time=0.751, grad_norm=96.991, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.047e-05, train_time=1.993 +[gpua003:0/64] 2023-07-07 02:55:45,673 (trainer:732) INFO: 21epoch:train:3101-3200batch: iter_time=1.044e-04, forward_time=0.128, loss_ctc=79.741, loss_att=59.681, acc=0.688, loss=65.699, backward_time=0.753, grad_norm=105.684, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.045e-05, train_time=2.048 +[gpua003:0/64] 2023-07-07 02:57:26,045 (trainer:732) INFO: 21epoch:train:3201-3300batch: iter_time=4.703e-04, forward_time=0.110, loss_ctc=72.892, loss_att=50.604, acc=0.704, loss=57.290, backward_time=0.750, grad_norm=98.215, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.043e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 02:57:59,564 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-07 02:58:19,306 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 02:58:23,210 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 02:58:23,210 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-07 02:58:23,216 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 03:04:26,311 (trainer:732) INFO: 21epoch:train:3301-3400batch: iter_time=1.411, forward_time=0.113, loss_ctc=70.531, loss_att=53.598, acc=0.698, loss=58.678, backward_time=0.765, grad_norm=95.913, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.040e-05, train_time=8.405 +[gpua003:0/64] 2023-07-07 03:06:06,849 (trainer:732) INFO: 21epoch:train:3401-3500batch: iter_time=1.204e-04, forward_time=0.109, loss_ctc=66.577, loss_att=55.002, acc=0.685, loss=58.474, backward_time=0.753, grad_norm=97.139, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.038e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 03:07:46,882 (trainer:732) INFO: 21epoch:train:3501-3600batch: iter_time=1.055e-04, forward_time=0.108, loss_ctc=68.566, loss_att=48.282, acc=0.717, loss=54.367, backward_time=0.754, grad_norm=84.774, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.036e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 03:09:26,695 (trainer:732) INFO: 21epoch:train:3601-3700batch: iter_time=1.129e-04, forward_time=0.109, loss_ctc=76.925, loss_att=57.566, acc=0.683, loss=63.374, backward_time=0.753, grad_norm=117.953, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.034e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 03:11:06,310 (trainer:732) INFO: 21epoch:train:3701-3800batch: iter_time=1.180e-04, forward_time=0.107, loss_ctc=70.698, loss_att=53.316, acc=0.704, loss=58.530, backward_time=0.751, grad_norm=94.370, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.032e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 03:12:46,196 (trainer:732) INFO: 21epoch:train:3801-3900batch: iter_time=1.218e-04, forward_time=0.109, loss_ctc=74.676, loss_att=54.150, acc=0.689, loss=60.307, backward_time=0.752, grad_norm=84.395, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.030e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 03:14:25,952 (trainer:732) INFO: 21epoch:train:3901-4000batch: iter_time=9.952e-05, forward_time=0.107, loss_ctc=79.508, loss_att=60.374, acc=0.682, loss=66.114, backward_time=0.752, grad_norm=109.277, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.028e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 03:16:05,888 (trainer:732) INFO: 21epoch:train:4001-4100batch: iter_time=8.976e-05, forward_time=0.108, loss_ctc=76.068, loss_att=53.950, acc=0.697, loss=60.586, backward_time=0.754, grad_norm=109.443, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.026e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 03:17:13,032 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-07 03:17:32,119 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 03:17:35,672 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 03:17:35,672 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-07 03:17:35,678 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 03:21:24,869 (trainer:732) INFO: 21epoch:train:4101-4200batch: iter_time=2.108, forward_time=0.163, loss_ctc=73.138, loss_att=59.185, acc=0.677, loss=63.371, backward_time=0.766, grad_norm=100.088, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.115, optim0_lr0=8.024e-05, train_time=6.378 +[gpua003:0/64] 2023-07-07 03:23:05,523 (trainer:732) INFO: 21epoch:train:4201-4300batch: iter_time=9.623e-05, forward_time=0.109, loss_ctc=67.172, loss_att=52.075, acc=0.720, loss=56.604, backward_time=0.755, grad_norm=80.071, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.022e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 03:24:45,805 (trainer:732) INFO: 21epoch:train:4301-4400batch: iter_time=1.166e-04, forward_time=0.111, loss_ctc=66.732, loss_att=52.975, acc=0.711, loss=57.102, backward_time=0.754, grad_norm=109.947, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.020e-05, train_time=2.005 +[gpua003:0/64] 2023-07-07 03:26:25,913 (trainer:732) INFO: 21epoch:train:4401-4500batch: iter_time=1.080e-04, forward_time=0.110, loss_ctc=63.186, loss_att=44.532, acc=0.715, loss=50.128, backward_time=0.755, grad_norm=80.740, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.018e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 03:28:05,766 (trainer:732) INFO: 21epoch:train:4501-4600batch: iter_time=1.226e-04, forward_time=0.110, loss_ctc=82.770, loss_att=62.550, acc=0.696, loss=68.616, backward_time=0.753, grad_norm=110.536, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.016e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 03:29:48,662 (trainer:732) INFO: 21epoch:train:4601-4700batch: iter_time=1.171e-04, forward_time=0.111, loss_ctc=71.738, loss_att=52.977, acc=0.712, loss=58.605, backward_time=0.757, grad_norm=88.053, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.014e-05, train_time=2.058 +[gpua003:0/64] 2023-07-07 03:31:28,763 (trainer:732) INFO: 21epoch:train:4701-4800batch: iter_time=1.118e-04, forward_time=0.110, loss_ctc=75.673, loss_att=54.196, acc=0.695, loss=60.639, backward_time=0.754, grad_norm=90.929, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.011e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 03:33:08,783 (trainer:732) INFO: 21epoch:train:4801-4900batch: iter_time=1.194e-04, forward_time=0.110, loss_ctc=76.080, loss_att=56.995, acc=0.697, loss=62.720, backward_time=0.755, grad_norm=119.887, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.009e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 03:34:49,915 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-07 03:35:09,166 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 03:35:12,753 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 03:35:12,753 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-07 03:35:12,760 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 03:39:22,173 (trainer:732) INFO: 21epoch:train:4901-5000batch: iter_time=1.327, forward_time=0.110, loss_ctc=72.216, loss_att=51.391, acc=0.693, loss=57.639, backward_time=0.762, grad_norm=97.793, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.007e-05, train_time=7.468 +[gpua003:0/64] 2023-07-07 03:41:06,488 (trainer:732) INFO: 21epoch:train:5001-5100batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=70.415, loss_att=54.270, acc=0.710, loss=59.113, backward_time=0.761, grad_norm=92.209, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.005e-05, train_time=2.086 +[gpua003:0/64] 2023-07-07 03:42:46,655 (trainer:732) INFO: 21epoch:train:5101-5200batch: iter_time=9.585e-05, forward_time=0.109, loss_ctc=64.440, loss_att=51.288, acc=0.697, loss=55.233, backward_time=0.754, grad_norm=91.829, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.003e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 03:44:26,435 (trainer:732) INFO: 21epoch:train:5201-5300batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=66.016, loss_att=47.983, acc=0.711, loss=53.393, backward_time=0.753, grad_norm=82.415, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.001e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 03:46:06,273 (trainer:732) INFO: 21epoch:train:5301-5400batch: iter_time=1.066e-04, forward_time=0.109, loss_ctc=79.568, loss_att=58.274, acc=0.689, loss=64.662, backward_time=0.752, grad_norm=101.250, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.999e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 03:47:45,977 (trainer:732) INFO: 21epoch:train:5401-5500batch: iter_time=9.211e-05, forward_time=0.108, loss_ctc=77.093, loss_att=57.211, acc=0.708, loss=63.176, backward_time=0.752, grad_norm=88.698, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.997e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 03:49:25,739 (trainer:732) INFO: 21epoch:train:5501-5600batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=70.776, loss_att=51.574, acc=0.689, loss=57.334, backward_time=0.752, grad_norm=92.327, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.995e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 03:51:05,561 (trainer:732) INFO: 21epoch:train:5601-5700batch: iter_time=9.823e-05, forward_time=0.109, loss_ctc=78.570, loss_att=59.212, acc=0.684, loss=65.020, backward_time=0.752, grad_norm=102.663, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.993e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 03:52:45,407 (trainer:732) INFO: 21epoch:train:5701-5800batch: iter_time=1.062e-04, forward_time=0.108, loss_ctc=74.565, loss_att=51.158, acc=0.691, loss=58.180, backward_time=0.753, grad_norm=101.119, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.991e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 03:53:20,039 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-07 03:53:39,181 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 03:53:42,818 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 03:53:42,818 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-07 03:53:42,824 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 03:58:59,477 (trainer:732) INFO: 21epoch:train:5801-5900batch: iter_time=1.377, forward_time=0.109, loss_ctc=68.222, loss_att=54.113, acc=0.702, loss=58.346, backward_time=0.768, grad_norm=85.484, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.989e-05, train_time=7.481 +[gpua003:0/64] 2023-07-07 04:00:40,031 (trainer:732) INFO: 21epoch:train:5901-6000batch: iter_time=9.701e-05, forward_time=0.108, loss_ctc=67.129, loss_att=54.573, acc=0.697, loss=58.340, backward_time=0.753, grad_norm=88.099, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.987e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 04:02:20,207 (trainer:732) INFO: 21epoch:train:6001-6100batch: iter_time=9.362e-05, forward_time=0.109, loss_ctc=68.337, loss_att=48.416, acc=0.728, loss=54.392, backward_time=0.752, grad_norm=86.732, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.985e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 04:04:01,464 (trainer:732) INFO: 21epoch:train:6101-6200batch: iter_time=9.073e-05, forward_time=0.109, loss_ctc=75.111, loss_att=55.799, acc=0.695, loss=61.593, backward_time=0.761, grad_norm=94.382, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.983e-05, train_time=2.025 +[gpua003:0/64] 2023-07-07 04:05:41,365 (trainer:732) INFO: 21epoch:train:6201-6300batch: iter_time=9.518e-05, forward_time=0.109, loss_ctc=69.408, loss_att=51.348, acc=0.719, loss=56.766, backward_time=0.752, grad_norm=91.702, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.981e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 04:07:21,149 (trainer:732) INFO: 21epoch:train:6301-6400batch: iter_time=9.310e-05, forward_time=0.109, loss_ctc=72.354, loss_att=53.860, acc=0.695, loss=59.408, backward_time=0.752, grad_norm=82.091, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.979e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 04:09:01,102 (trainer:732) INFO: 21epoch:train:6401-6500batch: iter_time=9.926e-05, forward_time=0.109, loss_ctc=78.032, loss_att=54.051, acc=0.697, loss=61.245, backward_time=0.753, grad_norm=110.132, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.977e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 04:10:40,790 (trainer:732) INFO: 21epoch:train:6501-6600batch: iter_time=9.807e-05, forward_time=0.107, loss_ctc=78.055, loss_att=56.847, acc=0.705, loss=63.209, backward_time=0.752, grad_norm=115.245, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.975e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 04:11:49,314 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-07 04:12:08,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 04:12:11,990 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 04:12:11,990 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-07 04:12:11,997 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 04:15:49,294 (trainer:732) INFO: 21epoch:train:6601-6700batch: iter_time=1.324, forward_time=0.109, loss_ctc=74.801, loss_att=60.444, acc=0.679, loss=64.751, backward_time=0.766, grad_norm=105.116, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.973e-05, train_time=6.170 +[gpua003:0/64] 2023-07-07 04:17:31,076 (trainer:732) INFO: 21epoch:train:6701-6800batch: iter_time=9.790e-05, forward_time=0.111, loss_ctc=67.289, loss_att=50.540, acc=0.720, loss=55.565, backward_time=0.757, grad_norm=81.960, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.971e-05, train_time=2.035 +[gpua003:0/64] 2023-07-07 04:19:10,966 (trainer:732) INFO: 21epoch:train:6801-6900batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=67.753, loss_att=53.586, acc=0.710, loss=57.836, backward_time=0.751, grad_norm=85.772, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.969e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 04:20:51,192 (trainer:732) INFO: 21epoch:train:6901-7000batch: iter_time=1.021e-04, forward_time=0.108, loss_ctc=62.269, loss_att=43.393, acc=0.718, loss=49.056, backward_time=0.752, grad_norm=94.628, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.967e-05, train_time=2.004 +[gpua003:0/64] 2023-07-07 04:22:31,278 (trainer:732) INFO: 21epoch:train:7001-7100batch: iter_time=1.125e-04, forward_time=0.109, loss_ctc=82.553, loss_att=62.057, acc=0.701, loss=68.206, backward_time=0.752, grad_norm=96.053, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.965e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 04:24:11,011 (trainer:732) INFO: 21epoch:train:7101-7200batch: iter_time=9.769e-05, forward_time=0.109, loss_ctc=73.617, loss_att=53.857, acc=0.710, loss=59.785, backward_time=0.752, grad_norm=106.016, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.963e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 04:25:51,930 (trainer:732) INFO: 21epoch:train:7201-7300batch: iter_time=2.744e-04, forward_time=0.119, loss_ctc=71.583, loss_att=52.769, acc=0.697, loss=58.413, backward_time=0.752, grad_norm=92.085, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.961e-05, train_time=2.018 +[gpua003:0/64] 2023-07-07 04:27:31,763 (trainer:732) INFO: 21epoch:train:7301-7400batch: iter_time=9.744e-05, forward_time=0.109, loss_ctc=78.262, loss_att=56.400, acc=0.700, loss=62.958, backward_time=0.753, grad_norm=108.333, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.959e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 04:29:11,616 (trainer:732) INFO: 21epoch:train:7401-7500batch: iter_time=9.512e-05, forward_time=0.109, loss_ctc=72.795, loss_att=51.382, acc=0.699, loss=57.806, backward_time=0.752, grad_norm=106.912, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.957e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 04:29:19,879 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-07 04:29:39,014 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 04:29:44,220 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 04:29:44,313 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-07 04:29:44,320 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 04:34:49,745 (trainer:732) INFO: 21epoch:train:7501-7600batch: iter_time=1.925, forward_time=0.158, loss_ctc=69.045, loss_att=53.787, acc=0.717, loss=58.364, backward_time=0.770, grad_norm=91.648, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.116, optim0_lr0=7.955e-05, train_time=6.762 +[gpua003:0/64] 2023-07-07 04:36:30,383 (trainer:732) INFO: 21epoch:train:7601-7700batch: iter_time=9.981e-05, forward_time=0.109, loss_ctc=65.367, loss_att=51.408, acc=0.707, loss=55.596, backward_time=0.753, grad_norm=96.250, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.952e-05, train_time=2.013 +[gpua003:0/64] 2023-07-07 04:38:11,636 (trainer:732) INFO: 21epoch:train:7701-7800batch: iter_time=1.030e-04, forward_time=0.110, loss_ctc=65.290, loss_att=46.653, acc=0.718, loss=52.244, backward_time=0.752, grad_norm=97.699, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.950e-05, train_time=2.025 +[gpua003:0/64] 2023-07-07 04:39:52,657 (trainer:732) INFO: 21epoch:train:7801-7900batch: iter_time=9.248e-05, forward_time=0.116, loss_ctc=79.587, loss_att=57.577, acc=0.697, loss=64.180, backward_time=0.752, grad_norm=90.219, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.948e-05, train_time=2.020 +[gpua003:0/64] 2023-07-07 04:41:35,649 (trainer:732) INFO: 21epoch:train:7901-8000batch: iter_time=6.759e-04, forward_time=0.130, loss_ctc=75.540, loss_att=56.684, acc=0.718, loss=62.341, backward_time=0.758, grad_norm=87.590, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.946e-05, train_time=2.060 +[gpua003:0/64] 2023-07-07 04:43:22,167 (trainer:732) INFO: 21epoch:train:8001-8100batch: iter_time=1.011e-04, forward_time=0.149, loss_ctc=71.159, loss_att=52.801, acc=0.693, loss=58.309, backward_time=0.770, grad_norm=152.348, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.116, optim0_lr0=7.944e-05, train_time=2.130 +[gpua003:0/64] 2023-07-07 04:45:05,978 (trainer:732) INFO: 21epoch:train:8101-8200batch: iter_time=9.418e-05, forward_time=0.140, loss_ctc=76.922, loss_att=55.656, acc=0.697, loss=62.036, backward_time=0.756, grad_norm=115.315, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.114, optim0_lr0=7.942e-05, train_time=2.076 +[gpua003:0/64] 2023-07-07 04:46:48,173 (trainer:732) INFO: 21epoch:train:8201-8300batch: iter_time=1.139e-04, forward_time=0.112, loss_ctc=71.045, loss_att=50.537, acc=0.700, loss=56.690, backward_time=0.751, grad_norm=97.377, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.940e-05, train_time=2.044 +[gpua003:0/64] 2023-07-07 04:47:40,909 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-07 04:48:00,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 04:48:04,000 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 04:48:04,001 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-07 04:48:04,007 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 04:53:34,303 (trainer:732) INFO: 21epoch:train:8301-8400batch: iter_time=2.928, forward_time=0.156, loss_ctc=74.000, loss_att=62.198, acc=0.690, loss=65.739, backward_time=0.775, grad_norm=93.122, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.938e-05, train_time=8.122 +[gpua003:0/64] 2023-07-07 04:55:17,196 (trainer:732) INFO: 21epoch:train:8401-8500batch: iter_time=1.028e-04, forward_time=0.111, loss_ctc=65.302, loss_att=52.812, acc=0.700, loss=56.559, backward_time=0.756, grad_norm=78.595, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.936e-05, train_time=2.058 +[gpua003:0/64] 2023-07-07 04:56:57,424 (trainer:732) INFO: 21epoch:train:8501-8600batch: iter_time=9.658e-05, forward_time=0.108, loss_ctc=68.684, loss_att=51.115, acc=0.721, loss=56.385, backward_time=0.750, grad_norm=82.716, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.934e-05, train_time=2.004 +[gpua003:0/64] 2023-07-07 04:58:38,276 (trainer:732) INFO: 21epoch:train:8601-8700batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=69.325, loss_att=51.087, acc=0.707, loss=56.559, backward_time=0.752, grad_norm=99.216, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.932e-05, train_time=2.017 +[gpua003:0/64] 2023-07-07 05:00:18,316 (trainer:732) INFO: 21epoch:train:8701-8800batch: iter_time=9.697e-05, forward_time=0.109, loss_ctc=73.945, loss_att=57.455, acc=0.694, loss=62.402, backward_time=0.753, grad_norm=89.756, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.930e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 05:01:58,070 (trainer:732) INFO: 21epoch:train:8801-8900batch: iter_time=1.013e-04, forward_time=0.109, loss_ctc=70.106, loss_att=52.451, acc=0.702, loss=57.747, backward_time=0.753, grad_norm=85.297, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.114, optim0_lr0=7.928e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 05:03:37,866 (trainer:732) INFO: 21epoch:train:8901-9000batch: iter_time=9.608e-05, forward_time=0.108, loss_ctc=76.088, loss_att=58.045, acc=0.688, loss=63.458, backward_time=0.752, grad_norm=106.493, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.926e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 05:05:17,660 (trainer:732) INFO: 21epoch:train:9001-9100batch: iter_time=9.909e-05, forward_time=0.108, loss_ctc=80.044, loss_att=57.867, acc=0.695, loss=64.520, backward_time=0.752, grad_norm=118.907, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.924e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 05:06:43,861 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-07 05:07:03,332 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 05:07:07,163 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 05:07:07,164 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-07 05:07:07,170 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 05:11:58,302 (trainer:732) INFO: 21epoch:train:9101-9200batch: iter_time=1.887, forward_time=0.165, loss_ctc=73.392, loss_att=56.440, acc=0.687, loss=61.525, backward_time=0.766, grad_norm=116.633, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.115, optim0_lr0=7.922e-05, train_time=8.012 +[gpua003:0/64] 2023-07-07 05:13:38,749 (trainer:732) INFO: 21epoch:train:9201-9300batch: iter_time=9.300e-05, forward_time=0.107, loss_ctc=67.815, loss_att=50.596, acc=0.716, loss=55.762, backward_time=0.753, grad_norm=79.997, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.920e-05, train_time=2.009 +[gpua003:0/64] 2023-07-07 05:15:19,209 (trainer:732) INFO: 21epoch:train:9301-9400batch: iter_time=1.019e-04, forward_time=0.107, loss_ctc=66.582, loss_att=52.925, acc=0.708, loss=57.022, backward_time=0.752, grad_norm=93.157, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.919e-05, train_time=2.009 +[gpua003:0/64] 2023-07-07 05:16:59,576 (trainer:732) INFO: 21epoch:train:9401-9500batch: iter_time=9.374e-05, forward_time=0.107, loss_ctc=61.813, loss_att=45.211, acc=0.710, loss=50.192, backward_time=0.752, grad_norm=86.096, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.917e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 05:18:40,211 (trainer:732) INFO: 21epoch:train:9501-9600batch: iter_time=9.173e-05, forward_time=0.107, loss_ctc=81.797, loss_att=61.241, acc=0.697, loss=67.408, backward_time=0.752, grad_norm=95.878, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.111, optim0_lr0=7.915e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 05:20:20,733 (trainer:732) INFO: 21epoch:train:9601-9700batch: iter_time=9.476e-05, forward_time=0.107, loss_ctc=72.431, loss_att=53.032, acc=0.704, loss=58.852, backward_time=0.752, grad_norm=86.451, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.913e-05, train_time=2.010 +[gpua003:0/64] 2023-07-07 05:22:00,233 (trainer:732) INFO: 21epoch:train:9701-9800batch: iter_time=9.436e-05, forward_time=0.107, loss_ctc=72.639, loss_att=53.426, acc=0.691, loss=59.190, backward_time=0.751, grad_norm=95.931, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.911e-05, train_time=1.990 +[gpua003:0/64] 2023-07-07 05:23:39,817 (trainer:732) INFO: 21epoch:train:9801-9900batch: iter_time=8.872e-05, forward_time=0.108, loss_ctc=76.251, loss_att=59.146, acc=0.691, loss=64.277, backward_time=0.752, grad_norm=114.109, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.909e-05, train_time=1.991 +[gpua003:0/64] 2023-07-07 05:25:19,445 (trainer:732) INFO: 21epoch:train:9901-10000batch: iter_time=9.037e-05, forward_time=0.107, loss_ctc=73.224, loss_att=50.857, acc=0.694, loss=57.567, backward_time=0.752, grad_norm=105.676, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.907e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 05:38:40,911 (trainer:338) INFO: 21epoch results: [train] iter_time=0.235, forward_time=0.113, loss_ctc=72.893, loss_att=54.497, acc=0.699, loss=60.016, backward_time=0.755, grad_norm=97.715, clip=100.000, loss_scale=2.398e+19, optim_step_time=0.113, optim0_lr0=8.007e-05, train_time=2.656, time=3 hours, 41 minutes and 44.21 seconds, total_count=180000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.186, cer_ctc=0.290, loss_att=41.004, acc=0.654, cer=0.409, wer=0.994, loss=43.759, time=6 minutes and 51.3 seconds, total_count=18722, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 7.7 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-07 05:38:59,977 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-07 05:39:00,026 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/14epoch.pth +[gpua003:0/64] 2023-07-07 05:39:00,083 (trainer:272) INFO: 22/100epoch started. Estimated time to finish: 1 week, 5 days and 17 hours +[gpua003:0/64] 2023-07-07 05:39:01,612 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-07 05:39:20,669 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 05:39:24,275 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 05:39:24,275 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-07 05:39:24,379 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 05:46:09,882 (trainer:732) INFO: 22epoch:train:1-100batch: iter_time=3.232, forward_time=0.130, loss_ctc=75.959, loss_att=57.589, acc=0.700, loss=63.100, backward_time=0.770, grad_norm=94.575, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.905e-05, train_time=8.584 +[gpua003:0/64] 2023-07-07 05:47:51,050 (trainer:732) INFO: 22epoch:train:101-200batch: iter_time=9.855e-05, forward_time=0.109, loss_ctc=68.208, loss_att=52.446, acc=0.682, loss=57.174, backward_time=0.756, grad_norm=97.364, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.903e-05, train_time=2.023 +[gpua003:0/64] 2023-07-07 05:49:30,923 (trainer:732) INFO: 22epoch:train:201-300batch: iter_time=9.754e-05, forward_time=0.110, loss_ctc=76.840, loss_att=59.608, acc=0.719, loss=64.777, backward_time=0.753, grad_norm=95.370, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.901e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 05:51:10,776 (trainer:732) INFO: 22epoch:train:301-400batch: iter_time=9.949e-05, forward_time=0.110, loss_ctc=77.024, loss_att=65.093, acc=0.681, loss=68.673, backward_time=0.752, grad_norm=106.966, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.899e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 05:52:55,638 (trainer:732) INFO: 22epoch:train:401-500batch: iter_time=9.878e-05, forward_time=0.109, loss_ctc=70.205, loss_att=56.739, acc=0.696, loss=60.779, backward_time=0.761, grad_norm=90.649, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.897e-05, train_time=2.097 +[gpua003:0/64] 2023-07-07 05:54:39,214 (trainer:732) INFO: 22epoch:train:501-600batch: iter_time=1.011e-04, forward_time=0.109, loss_ctc=67.033, loss_att=51.629, acc=0.698, loss=56.250, backward_time=0.754, grad_norm=93.014, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.895e-05, train_time=2.071 +[gpua003:0/64] 2023-07-07 05:56:20,364 (trainer:732) INFO: 22epoch:train:601-700batch: iter_time=1.016e-04, forward_time=0.109, loss_ctc=65.903, loss_att=46.926, acc=0.685, loss=52.619, backward_time=0.751, grad_norm=81.828, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.893e-05, train_time=2.023 +[gpua003:0/64] 2023-07-07 05:58:00,763 (trainer:732) INFO: 22epoch:train:701-800batch: iter_time=9.919e-05, forward_time=0.108, loss_ctc=77.598, loss_att=57.244, acc=0.698, loss=63.350, backward_time=0.751, grad_norm=97.156, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.891e-05, train_time=2.008 +[gpua003:0/64] 2023-07-07 05:58:40,682 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-07 05:58:59,667 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 05:59:03,212 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 05:59:03,212 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-07 05:59:03,218 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 06:03:49,036 (trainer:732) INFO: 22epoch:train:801-900batch: iter_time=1.356, forward_time=0.139, loss_ctc=73.151, loss_att=56.666, acc=0.700, loss=61.612, backward_time=0.769, grad_norm=98.640, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.889e-05, train_time=6.965 +[gpua003:0/64] 2023-07-07 06:05:29,630 (trainer:732) INFO: 22epoch:train:901-1000batch: iter_time=9.770e-05, forward_time=0.110, loss_ctc=68.300, loss_att=50.098, acc=0.687, loss=55.559, backward_time=0.754, grad_norm=100.634, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.887e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 06:07:09,505 (trainer:732) INFO: 22epoch:train:1001-1100batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=73.251, loss_att=59.729, acc=0.702, loss=63.786, backward_time=0.752, grad_norm=99.591, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.885e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 06:08:52,104 (trainer:732) INFO: 22epoch:train:1101-1200batch: iter_time=1.038e-04, forward_time=0.109, loss_ctc=71.657, loss_att=55.743, acc=0.704, loss=60.517, backward_time=0.765, grad_norm=85.238, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.883e-05, train_time=2.052 +[gpua003:0/64] 2023-07-07 06:10:32,190 (trainer:732) INFO: 22epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.109, loss_ctc=76.079, loss_att=65.762, acc=0.682, loss=68.857, backward_time=0.753, grad_norm=102.836, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.881e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 06:12:12,065 (trainer:732) INFO: 22epoch:train:1301-1400batch: iter_time=9.626e-05, forward_time=0.109, loss_ctc=69.609, loss_att=55.901, acc=0.688, loss=60.014, backward_time=0.753, grad_norm=115.398, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.879e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 06:13:52,048 (trainer:732) INFO: 22epoch:train:1401-1500batch: iter_time=8.880e-05, forward_time=0.109, loss_ctc=61.548, loss_att=44.239, acc=0.684, loss=49.431, backward_time=0.753, grad_norm=108.769, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.877e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 06:15:31,817 (trainer:732) INFO: 22epoch:train:1501-1600batch: iter_time=9.085e-05, forward_time=0.109, loss_ctc=73.610, loss_att=51.494, acc=0.698, loss=58.129, backward_time=0.753, grad_norm=95.261, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.875e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 06:16:54,392 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-07 06:17:13,867 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 06:17:17,499 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 06:17:17,499 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-07 06:17:17,506 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 06:21:41,543 (trainer:732) INFO: 22epoch:train:1601-1700batch: iter_time=2.544, forward_time=0.118, loss_ctc=71.770, loss_att=56.335, acc=0.704, loss=60.965, backward_time=0.763, grad_norm=88.321, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.873e-05, train_time=7.394 +[gpua003:0/64] 2023-07-07 06:23:24,648 (trainer:732) INFO: 22epoch:train:1701-1800batch: iter_time=8.832e-05, forward_time=0.108, loss_ctc=72.678, loss_att=52.151, acc=0.698, loss=58.309, backward_time=0.758, grad_norm=95.685, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.871e-05, train_time=2.062 +[gpua003:0/64] 2023-07-07 06:25:06,463 (trainer:732) INFO: 22epoch:train:1801-1900batch: iter_time=9.287e-05, forward_time=0.122, loss_ctc=66.852, loss_att=52.819, acc=0.716, loss=57.029, backward_time=0.755, grad_norm=93.289, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.869e-05, train_time=2.036 +[gpua003:0/64] 2023-07-07 06:26:47,146 (trainer:732) INFO: 22epoch:train:1901-2000batch: iter_time=1.001e-04, forward_time=0.114, loss_ctc=77.777, loss_att=64.279, acc=0.708, loss=68.329, backward_time=0.754, grad_norm=107.242, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.867e-05, train_time=2.013 +[gpua003:0/64] 2023-07-07 06:28:27,163 (trainer:732) INFO: 22epoch:train:2001-2100batch: iter_time=9.088e-05, forward_time=0.109, loss_ctc=75.746, loss_att=62.285, acc=0.691, loss=66.323, backward_time=0.754, grad_norm=93.071, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.865e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 06:30:09,221 (trainer:732) INFO: 22epoch:train:2101-2200batch: iter_time=8.882e-05, forward_time=0.123, loss_ctc=70.706, loss_att=58.251, acc=0.693, loss=61.987, backward_time=0.758, grad_norm=99.862, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.863e-05, train_time=2.041 +[gpua003:0/64] 2023-07-07 06:31:49,013 (trainer:732) INFO: 22epoch:train:2201-2300batch: iter_time=8.759e-05, forward_time=0.108, loss_ctc=56.761, loss_att=45.017, acc=0.696, loss=48.540, backward_time=0.752, grad_norm=80.159, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.862e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 06:33:28,852 (trainer:732) INFO: 22epoch:train:2301-2400batch: iter_time=9.062e-05, forward_time=0.108, loss_ctc=69.849, loss_att=49.995, acc=0.703, loss=55.951, backward_time=0.751, grad_norm=112.615, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.860e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 06:35:09,707 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-07 06:35:28,850 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 06:35:32,446 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 06:35:32,446 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-07 06:35:32,453 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 06:39:06,065 (trainer:732) INFO: 22epoch:train:2401-2500batch: iter_time=1.292, forward_time=0.109, loss_ctc=71.798, loss_att=50.317, acc=0.717, loss=56.761, backward_time=0.759, grad_norm=95.019, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.858e-05, train_time=6.744 +[gpua003:0/64] 2023-07-07 06:40:51,328 (trainer:732) INFO: 22epoch:train:2501-2600batch: iter_time=1.031e-04, forward_time=0.109, loss_ctc=75.859, loss_att=56.028, acc=0.710, loss=61.978, backward_time=0.760, grad_norm=95.952, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.856e-05, train_time=2.105 +[gpua003:0/64] 2023-07-07 06:42:31,354 (trainer:732) INFO: 22epoch:train:2601-2700batch: iter_time=1.082e-04, forward_time=0.108, loss_ctc=66.166, loss_att=50.393, acc=0.695, loss=55.125, backward_time=0.751, grad_norm=86.983, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.854e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 06:44:10,962 (trainer:732) INFO: 22epoch:train:2701-2800batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=77.042, loss_att=58.748, acc=0.721, loss=64.236, backward_time=0.750, grad_norm=99.652, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.852e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 06:45:50,837 (trainer:732) INFO: 22epoch:train:2801-2900batch: iter_time=1.088e-04, forward_time=0.109, loss_ctc=73.184, loss_att=61.853, acc=0.694, loss=65.253, backward_time=0.752, grad_norm=101.544, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.850e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 06:47:30,553 (trainer:732) INFO: 22epoch:train:2901-3000batch: iter_time=1.154e-04, forward_time=0.109, loss_ctc=70.952, loss_att=57.515, acc=0.703, loss=61.546, backward_time=0.751, grad_norm=97.436, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.848e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 06:49:10,499 (trainer:732) INFO: 22epoch:train:3001-3100batch: iter_time=1.189e-04, forward_time=0.109, loss_ctc=62.039, loss_att=47.304, acc=0.712, loss=51.724, backward_time=0.753, grad_norm=90.810, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.846e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 06:50:50,138 (trainer:732) INFO: 22epoch:train:3101-3200batch: iter_time=1.267e-04, forward_time=0.109, loss_ctc=66.663, loss_att=46.048, acc=0.693, loss=52.232, backward_time=0.750, grad_norm=86.756, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.844e-05, train_time=1.993 +[gpua003:0/64] 2023-07-07 06:52:32,525 (trainer:732) INFO: 22epoch:train:3201-3300batch: iter_time=1.227e-04, forward_time=0.110, loss_ctc=72.678, loss_att=54.618, acc=0.711, loss=60.036, backward_time=0.757, grad_norm=104.682, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.842e-05, train_time=2.048 +[gpua003:0/64] 2023-07-07 06:53:06,741 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-07 06:53:26,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 06:53:29,609 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 06:53:29,609 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-07 06:53:30,022 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 06:57:58,983 (trainer:732) INFO: 22epoch:train:3301-3400batch: iter_time=1.348, forward_time=0.145, loss_ctc=72.094, loss_att=56.366, acc=0.703, loss=61.085, backward_time=0.769, grad_norm=89.951, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.840e-05, train_time=6.529 +[gpua003:0/64] 2023-07-07 06:59:39,298 (trainer:732) INFO: 22epoch:train:3401-3500batch: iter_time=1.038e-04, forward_time=0.110, loss_ctc=66.447, loss_att=49.854, acc=0.686, loss=54.832, backward_time=0.753, grad_norm=93.159, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.838e-05, train_time=2.006 +[gpua003:0/64] 2023-07-07 07:01:19,264 (trainer:732) INFO: 22epoch:train:3501-3600batch: iter_time=9.217e-05, forward_time=0.109, loss_ctc=75.507, loss_att=61.068, acc=0.704, loss=65.400, backward_time=0.751, grad_norm=96.972, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.836e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 07:02:59,198 (trainer:732) INFO: 22epoch:train:3601-3700batch: iter_time=1.088e-04, forward_time=0.109, loss_ctc=70.999, loss_att=57.684, acc=0.705, loss=61.678, backward_time=0.752, grad_norm=85.253, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.834e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 07:04:38,930 (trainer:732) INFO: 22epoch:train:3701-3800batch: iter_time=1.136e-04, forward_time=0.109, loss_ctc=72.958, loss_att=61.007, acc=0.688, loss=64.592, backward_time=0.750, grad_norm=110.731, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.833e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 07:06:18,662 (trainer:732) INFO: 22epoch:train:3801-3900batch: iter_time=1.150e-04, forward_time=0.109, loss_ctc=69.973, loss_att=57.379, acc=0.688, loss=61.157, backward_time=0.751, grad_norm=104.773, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.831e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 07:07:58,470 (trainer:732) INFO: 22epoch:train:3901-4000batch: iter_time=1.153e-04, forward_time=0.109, loss_ctc=58.578, loss_att=42.394, acc=0.697, loss=47.249, backward_time=0.752, grad_norm=78.336, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.829e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 07:09:38,304 (trainer:732) INFO: 22epoch:train:4001-4100batch: iter_time=9.816e-05, forward_time=0.108, loss_ctc=72.183, loss_att=51.473, acc=0.699, loss=57.686, backward_time=0.753, grad_norm=90.776, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.827e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 07:10:44,632 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-07 07:11:04,150 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 07:11:07,729 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 07:11:07,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-07 07:11:07,735 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 07:14:13,080 (trainer:732) INFO: 22epoch:train:4101-4200batch: iter_time=1.318, forward_time=0.108, loss_ctc=71.941, loss_att=54.810, acc=0.713, loss=59.949, backward_time=0.764, grad_norm=86.300, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.825e-05, train_time=5.495 +[gpua003:0/64] 2023-07-07 07:15:54,008 (trainer:732) INFO: 22epoch:train:4201-4300batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=70.358, loss_att=50.448, acc=0.689, loss=56.421, backward_time=0.755, grad_norm=99.875, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.823e-05, train_time=2.018 +[gpua003:0/64] 2023-07-07 07:17:35,127 (trainer:732) INFO: 22epoch:train:4301-4400batch: iter_time=1.074e-04, forward_time=0.108, loss_ctc=70.078, loss_att=57.577, acc=0.697, loss=61.327, backward_time=0.751, grad_norm=93.447, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.821e-05, train_time=2.022 +[gpua003:0/64] 2023-07-07 07:19:17,039 (trainer:732) INFO: 22epoch:train:4401-4500batch: iter_time=8.866e-05, forward_time=0.108, loss_ctc=73.720, loss_att=60.764, acc=0.703, loss=64.650, backward_time=0.759, grad_norm=98.011, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.819e-05, train_time=2.038 +[gpua003:0/64] 2023-07-07 07:20:57,799 (trainer:732) INFO: 22epoch:train:4501-4600batch: iter_time=9.315e-05, forward_time=0.110, loss_ctc=74.002, loss_att=60.934, acc=0.685, loss=64.855, backward_time=0.754, grad_norm=99.614, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.817e-05, train_time=2.015 +[gpua003:0/64] 2023-07-07 07:22:39,059 (trainer:732) INFO: 22epoch:train:4601-4700batch: iter_time=1.059e-04, forward_time=0.109, loss_ctc=69.200, loss_att=56.517, acc=0.691, loss=60.322, backward_time=0.752, grad_norm=101.222, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.815e-05, train_time=2.025 +[gpua003:0/64] 2023-07-07 07:24:20,772 (trainer:732) INFO: 22epoch:train:4701-4800batch: iter_time=1.095e-04, forward_time=0.109, loss_ctc=61.538, loss_att=45.833, acc=0.689, loss=50.545, backward_time=0.754, grad_norm=95.781, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.813e-05, train_time=2.034 +[gpua003:0/64] 2023-07-07 07:26:02,965 (trainer:732) INFO: 22epoch:train:4801-4900batch: iter_time=1.033e-04, forward_time=0.109, loss_ctc=69.082, loss_att=51.514, acc=0.696, loss=56.785, backward_time=0.752, grad_norm=95.683, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.811e-05, train_time=2.044 +[gpua003:0/64] 2023-07-07 07:27:49,767 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-07 07:28:09,330 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 07:28:12,926 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 07:28:12,926 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-07 07:28:12,933 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 07:31:57,955 (trainer:732) INFO: 22epoch:train:4901-5000batch: iter_time=2.503, forward_time=0.127, loss_ctc=69.914, loss_att=52.926, acc=0.705, loss=58.022, backward_time=0.762, grad_norm=90.567, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.810e-05, train_time=7.100 +[gpua003:0/64] 2023-07-07 07:33:40,006 (trainer:732) INFO: 22epoch:train:5001-5100batch: iter_time=1.018e-04, forward_time=0.109, loss_ctc=75.894, loss_att=56.058, acc=0.706, loss=62.009, backward_time=0.760, grad_norm=95.605, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.808e-05, train_time=2.041 +[gpua003:0/64] 2023-07-07 07:35:20,731 (trainer:732) INFO: 22epoch:train:5101-5200batch: iter_time=1.041e-04, forward_time=0.108, loss_ctc=65.260, loss_att=50.518, acc=0.693, loss=54.941, backward_time=0.754, grad_norm=95.597, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.806e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 07:37:03,337 (trainer:732) INFO: 22epoch:train:5201-5300batch: iter_time=9.542e-05, forward_time=0.109, loss_ctc=75.495, loss_att=58.850, acc=0.712, loss=63.844, backward_time=0.755, grad_norm=96.146, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.804e-05, train_time=2.052 +[gpua003:0/64] 2023-07-07 07:38:52,102 (trainer:732) INFO: 22epoch:train:5301-5400batch: iter_time=9.699e-05, forward_time=0.109, loss_ctc=72.248, loss_att=62.576, acc=0.684, loss=65.477, backward_time=0.759, grad_norm=109.912, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.802e-05, train_time=2.175 +[gpua003:0/64] 2023-07-07 07:40:47,676 (trainer:732) INFO: 22epoch:train:5401-5500batch: iter_time=9.981e-05, forward_time=0.108, loss_ctc=69.974, loss_att=56.541, acc=0.690, loss=60.571, backward_time=0.802, grad_norm=96.604, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.800e-05, train_time=2.311 +[gpua003:0/64] 2023-07-07 07:42:29,427 (trainer:732) INFO: 22epoch:train:5501-5600batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=62.758, loss_att=47.686, acc=0.702, loss=52.208, backward_time=0.762, grad_norm=88.346, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.798e-05, train_time=2.035 +[gpua003:0/64] 2023-07-07 07:44:09,193 (trainer:732) INFO: 22epoch:train:5601-5700batch: iter_time=1.001e-04, forward_time=0.109, loss_ctc=65.100, loss_att=45.891, acc=0.696, loss=51.654, backward_time=0.751, grad_norm=81.829, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.796e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 07:45:53,335 (trainer:732) INFO: 22epoch:train:5701-5800batch: iter_time=9.999e-05, forward_time=0.108, loss_ctc=74.308, loss_att=57.186, acc=0.695, loss=62.323, backward_time=0.767, grad_norm=150.463, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.794e-05, train_time=2.083 +[gpua003:0/64] 2023-07-07 07:46:33,767 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-07 07:46:52,851 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 07:46:56,470 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 07:46:56,470 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-07 07:46:56,477 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 07:50:29,089 (trainer:732) INFO: 22epoch:train:5801-5900batch: iter_time=1.584, forward_time=0.131, loss_ctc=74.207, loss_att=54.322, acc=0.717, loss=60.288, backward_time=0.764, grad_norm=91.726, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.792e-05, train_time=5.515 +[gpua003:0/64] 2023-07-07 07:52:09,734 (trainer:732) INFO: 22epoch:train:5901-6000batch: iter_time=9.734e-05, forward_time=0.110, loss_ctc=65.678, loss_att=47.794, acc=0.691, loss=53.160, backward_time=0.752, grad_norm=83.021, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.791e-05, train_time=2.013 +[gpua003:0/64] 2023-07-07 07:53:50,393 (trainer:732) INFO: 22epoch:train:6001-6100batch: iter_time=9.193e-05, forward_time=0.112, loss_ctc=74.747, loss_att=60.495, acc=0.721, loss=64.771, backward_time=0.755, grad_norm=90.014, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.789e-05, train_time=2.013 +[gpua003:0/64] 2023-07-07 07:55:31,643 (trainer:732) INFO: 22epoch:train:6101-6200batch: iter_time=9.239e-05, forward_time=0.119, loss_ctc=73.282, loss_att=60.349, acc=0.698, loss=64.229, backward_time=0.756, grad_norm=88.930, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.787e-05, train_time=2.025 +[gpua003:0/64] 2023-07-07 07:57:11,271 (trainer:732) INFO: 22epoch:train:6201-6300batch: iter_time=9.293e-05, forward_time=0.108, loss_ctc=67.412, loss_att=54.664, acc=0.707, loss=58.488, backward_time=0.751, grad_norm=95.356, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.785e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 07:58:51,149 (trainer:732) INFO: 22epoch:train:6301-6400batch: iter_time=9.666e-05, forward_time=0.109, loss_ctc=66.473, loss_att=51.695, acc=0.714, loss=56.129, backward_time=0.751, grad_norm=103.423, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.783e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 08:00:31,053 (trainer:732) INFO: 22epoch:train:6401-6500batch: iter_time=9.940e-05, forward_time=0.110, loss_ctc=61.632, loss_att=44.108, acc=0.695, loss=49.365, backward_time=0.752, grad_norm=86.270, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.781e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 08:02:10,665 (trainer:732) INFO: 22epoch:train:6501-6600batch: iter_time=1.027e-04, forward_time=0.109, loss_ctc=72.139, loss_att=51.933, acc=0.712, loss=57.995, backward_time=0.750, grad_norm=96.771, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.779e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 08:03:18,465 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-07 08:03:37,874 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 08:03:41,509 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 08:03:41,509 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-07 08:03:41,515 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 08:07:00,984 (trainer:732) INFO: 22epoch:train:6601-6700batch: iter_time=1.390, forward_time=0.109, loss_ctc=70.332, loss_att=55.203, acc=0.706, loss=59.742, backward_time=0.765, grad_norm=97.451, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.777e-05, train_time=5.806 +[gpua003:0/64] 2023-07-07 08:08:45,065 (trainer:732) INFO: 22epoch:train:6701-6800batch: iter_time=9.209e-05, forward_time=0.108, loss_ctc=72.039, loss_att=50.836, acc=0.705, loss=57.197, backward_time=0.762, grad_norm=96.664, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.776e-05, train_time=2.081 +[gpua003:0/64] 2023-07-07 08:10:27,886 (trainer:732) INFO: 22epoch:train:6801-6900batch: iter_time=9.907e-05, forward_time=0.109, loss_ctc=66.153, loss_att=54.046, acc=0.709, loss=57.678, backward_time=0.754, grad_norm=91.951, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.774e-05, train_time=2.056 +[gpua003:0/64] 2023-07-07 08:12:07,728 (trainer:732) INFO: 22epoch:train:6901-7000batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=76.287, loss_att=59.045, acc=0.713, loss=64.218, backward_time=0.752, grad_norm=110.971, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.772e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 08:13:52,415 (trainer:732) INFO: 22epoch:train:7001-7100batch: iter_time=9.646e-05, forward_time=0.110, loss_ctc=72.777, loss_att=62.041, acc=0.682, loss=65.261, backward_time=0.770, grad_norm=98.939, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.770e-05, train_time=2.094 +[gpua003:0/64] 2023-07-07 08:15:37,878 (trainer:732) INFO: 22epoch:train:7101-7200batch: iter_time=9.875e-05, forward_time=0.109, loss_ctc=69.184, loss_att=56.850, acc=0.686, loss=60.550, backward_time=0.767, grad_norm=104.102, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.768e-05, train_time=2.109 +[gpua003:0/64] 2023-07-07 08:17:18,027 (trainer:732) INFO: 22epoch:train:7201-7300batch: iter_time=9.466e-05, forward_time=0.108, loss_ctc=56.723, loss_att=44.561, acc=0.694, loss=48.210, backward_time=0.751, grad_norm=82.134, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.766e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 08:19:11,959 (trainer:732) INFO: 22epoch:train:7301-7400batch: iter_time=9.554e-05, forward_time=0.109, loss_ctc=68.916, loss_att=48.970, acc=0.708, loss=54.954, backward_time=0.805, grad_norm=145.408, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.764e-05, train_time=2.278 +[gpua003:0/64] 2023-07-07 08:20:53,519 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-07 08:21:12,743 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 08:21:16,313 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 08:21:16,313 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-07 08:21:16,320 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 08:23:45,003 (trainer:732) INFO: 22epoch:train:7401-7500batch: iter_time=1.329, forward_time=0.136, loss_ctc=70.359, loss_att=50.850, acc=0.717, loss=56.703, backward_time=0.762, grad_norm=84.034, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.116, optim0_lr0=7.762e-05, train_time=5.461 +[gpua003:0/64] 2023-07-07 08:25:27,825 (trainer:732) INFO: 22epoch:train:7501-7600batch: iter_time=8.708e-05, forward_time=0.110, loss_ctc=76.765, loss_att=54.087, acc=0.711, loss=60.891, backward_time=0.763, grad_norm=98.850, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.761e-05, train_time=2.056 +[gpua003:0/64] 2023-07-07 08:27:08,334 (trainer:732) INFO: 22epoch:train:7601-7700batch: iter_time=1.042e-04, forward_time=0.109, loss_ctc=64.296, loss_att=50.883, acc=0.706, loss=54.907, backward_time=0.754, grad_norm=87.185, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.759e-05, train_time=2.010 +[gpua003:0/64] 2023-07-07 08:28:48,255 (trainer:732) INFO: 22epoch:train:7701-7800batch: iter_time=8.846e-05, forward_time=0.108, loss_ctc=74.565, loss_att=60.399, acc=0.719, loss=64.649, backward_time=0.752, grad_norm=95.857, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.757e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 08:30:28,045 (trainer:732) INFO: 22epoch:train:7801-7900batch: iter_time=1.037e-04, forward_time=0.109, loss_ctc=73.972, loss_att=62.893, acc=0.689, loss=66.217, backward_time=0.752, grad_norm=87.398, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.755e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 08:32:07,809 (trainer:732) INFO: 22epoch:train:7901-8000batch: iter_time=1.089e-04, forward_time=0.110, loss_ctc=70.642, loss_att=57.664, acc=0.693, loss=61.557, backward_time=0.752, grad_norm=92.959, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.753e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 08:33:47,492 (trainer:732) INFO: 22epoch:train:8001-8100batch: iter_time=1.063e-04, forward_time=0.109, loss_ctc=55.497, loss_att=44.132, acc=0.708, loss=47.541, backward_time=0.752, grad_norm=80.785, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.751e-05, train_time=1.993 +[gpua003:0/64] 2023-07-07 08:35:27,353 (trainer:732) INFO: 22epoch:train:8101-8200batch: iter_time=1.061e-04, forward_time=0.111, loss_ctc=67.689, loss_att=46.225, acc=0.704, loss=52.664, backward_time=0.752, grad_norm=103.286, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.749e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 08:37:07,180 (trainer:732) INFO: 22epoch:train:8201-8300batch: iter_time=1.080e-04, forward_time=0.111, loss_ctc=72.512, loss_att=53.632, acc=0.717, loss=59.296, backward_time=0.752, grad_norm=82.921, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.747e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 08:37:49,047 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-07 08:38:08,430 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 08:38:12,343 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 08:38:12,343 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-07 08:38:12,349 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 08:43:42,647 (trainer:732) INFO: 22epoch:train:8301-8400batch: iter_time=2.865, forward_time=0.127, loss_ctc=76.998, loss_att=56.248, acc=0.715, loss=62.473, backward_time=0.764, grad_norm=91.921, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.746e-05, train_time=7.909 +[gpua003:0/64] 2023-07-07 08:45:23,857 (trainer:732) INFO: 22epoch:train:8401-8500batch: iter_time=9.852e-05, forward_time=0.109, loss_ctc=64.243, loss_att=47.232, acc=0.693, loss=52.335, backward_time=0.754, grad_norm=77.637, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.744e-05, train_time=2.024 +[gpua003:0/64] 2023-07-07 08:47:04,339 (trainer:732) INFO: 22epoch:train:8501-8600batch: iter_time=1.040e-04, forward_time=0.109, loss_ctc=74.869, loss_att=61.475, acc=0.714, loss=65.493, backward_time=0.753, grad_norm=93.438, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.742e-05, train_time=2.009 +[gpua003:0/64] 2023-07-07 08:48:43,954 (trainer:732) INFO: 22epoch:train:8601-8700batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=73.122, loss_att=61.953, acc=0.688, loss=65.304, backward_time=0.751, grad_norm=96.667, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.740e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 08:50:26,881 (trainer:732) INFO: 22epoch:train:8701-8800batch: iter_time=1.126e-04, forward_time=0.108, loss_ctc=66.145, loss_att=55.031, acc=0.694, loss=58.365, backward_time=0.754, grad_norm=94.273, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.738e-05, train_time=2.058 +[gpua003:0/64] 2023-07-07 08:52:06,631 (trainer:732) INFO: 22epoch:train:8801-8900batch: iter_time=1.047e-04, forward_time=0.108, loss_ctc=66.295, loss_att=51.963, acc=0.699, loss=56.262, backward_time=0.751, grad_norm=94.275, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.736e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 08:53:46,418 (trainer:732) INFO: 22epoch:train:8901-9000batch: iter_time=9.829e-05, forward_time=0.108, loss_ctc=61.909, loss_att=43.782, acc=0.697, loss=49.220, backward_time=0.752, grad_norm=82.673, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.734e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 08:55:26,132 (trainer:732) INFO: 22epoch:train:9001-9100batch: iter_time=1.043e-04, forward_time=0.108, loss_ctc=71.915, loss_att=53.753, acc=0.707, loss=59.202, backward_time=0.751, grad_norm=109.132, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.733e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 08:56:34,455 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-07 08:56:53,892 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 08:56:57,546 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 08:56:57,546 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-07 08:56:57,553 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 09:00:31,061 (trainer:732) INFO: 22epoch:train:9101-9200batch: iter_time=1.431, forward_time=0.127, loss_ctc=71.039, loss_att=54.915, acc=0.705, loss=59.752, backward_time=0.761, grad_norm=106.663, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.731e-05, train_time=6.098 +[gpua003:0/64] 2023-07-07 09:02:14,178 (trainer:732) INFO: 22epoch:train:9201-9300batch: iter_time=1.042e-04, forward_time=0.122, loss_ctc=72.597, loss_att=52.275, acc=0.703, loss=58.372, backward_time=0.760, grad_norm=105.215, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.115, optim0_lr0=7.729e-05, train_time=2.062 +[gpua003:0/64] 2023-07-07 09:03:55,492 (trainer:732) INFO: 22epoch:train:9301-9400batch: iter_time=1.019e-04, forward_time=0.111, loss_ctc=66.483, loss_att=52.418, acc=0.717, loss=56.638, backward_time=0.755, grad_norm=102.064, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.727e-05, train_time=2.026 +[gpua003:0/64] 2023-07-07 09:05:35,862 (trainer:732) INFO: 22epoch:train:9401-9500batch: iter_time=1.021e-04, forward_time=0.110, loss_ctc=75.311, loss_att=58.987, acc=0.722, loss=63.884, backward_time=0.752, grad_norm=84.201, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.725e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 09:07:16,074 (trainer:732) INFO: 22epoch:train:9501-9600batch: iter_time=9.991e-05, forward_time=0.111, loss_ctc=71.239, loss_att=61.029, acc=0.693, loss=64.092, backward_time=0.754, grad_norm=112.479, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.723e-05, train_time=2.004 +[gpua003:0/64] 2023-07-07 09:08:55,956 (trainer:732) INFO: 22epoch:train:9601-9700batch: iter_time=1.011e-04, forward_time=0.111, loss_ctc=69.983, loss_att=56.365, acc=0.703, loss=60.450, backward_time=0.753, grad_norm=101.191, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.722e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 09:10:39,207 (trainer:732) INFO: 22epoch:train:9701-9800batch: iter_time=1.137e-04, forward_time=0.129, loss_ctc=55.743, loss_att=43.049, acc=0.703, loss=46.857, backward_time=0.756, grad_norm=90.349, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.124, optim0_lr0=7.720e-05, train_time=2.065 +[gpua003:0/64] 2023-07-07 09:12:19,264 (trainer:732) INFO: 22epoch:train:9801-9900batch: iter_time=9.062e-05, forward_time=0.110, loss_ctc=69.518, loss_att=49.134, acc=0.711, loss=55.249, backward_time=0.753, grad_norm=100.063, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.718e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 09:14:01,810 (trainer:732) INFO: 22epoch:train:9901-10000batch: iter_time=9.648e-05, forward_time=0.129, loss_ctc=69.398, loss_att=49.229, acc=0.721, loss=55.280, backward_time=0.756, grad_norm=90.245, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.716e-05, train_time=2.051 +[gpua003:0/64] 2023-07-07 09:26:51,892 (trainer:338) INFO: 22epoch results: [train] iter_time=0.222, forward_time=0.112, loss_ctc=70.052, loss_att=54.212, acc=0.701, loss=58.964, backward_time=0.757, grad_norm=96.262, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.809e-05, train_time=2.580, time=3 hours, 35 minutes and 15.27 seconds, total_count=190000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=53.056, cer_ctc=0.291, loss_att=42.969, acc=0.658, cer=0.388, wer=0.991, loss=45.995, time=6 minutes and 5.25 seconds, total_count=19734, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 31.11 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-07 09:27:11,470 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-07 09:27:11,478 (trainer:272) INFO: 23/100epoch started. Estimated time to finish: 1 week, 5 days and 13 hours +[gpua003:0/64] 2023-07-07 09:27:12,495 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-07 09:27:32,952 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 09:27:36,862 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 09:27:36,865 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-07 09:27:36,955 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 09:34:04,979 (trainer:732) INFO: 23epoch:train:1-100batch: iter_time=3.070, forward_time=0.136, loss_ctc=73.179, loss_att=57.756, acc=0.704, loss=62.383, backward_time=0.766, grad_norm=99.480, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.115, optim0_lr0=7.714e-05, train_time=8.259 +[gpua003:0/64] 2023-07-07 09:35:47,590 (trainer:732) INFO: 23epoch:train:101-200batch: iter_time=9.809e-05, forward_time=0.110, loss_ctc=65.447, loss_att=55.548, acc=0.684, loss=58.518, backward_time=0.757, grad_norm=105.042, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.712e-05, train_time=2.052 +[gpua003:0/64] 2023-07-07 09:37:44,269 (trainer:732) INFO: 23epoch:train:201-300batch: iter_time=2.992e-04, forward_time=0.200, loss_ctc=91.477, loss_att=64.962, acc=0.703, loss=72.917, backward_time=0.765, grad_norm=137.116, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.130, optim0_lr0=7.711e-05, train_time=2.331 +[gpua003:0/64] 2023-07-07 09:39:30,373 (trainer:732) INFO: 23epoch:train:301-400batch: iter_time=2.111e-04, forward_time=0.143, loss_ctc=74.075, loss_att=60.821, acc=0.698, loss=64.797, backward_time=0.764, grad_norm=100.292, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.118, optim0_lr0=7.709e-05, train_time=2.124 +[gpua003:0/64] 2023-07-07 09:41:13,338 (trainer:732) INFO: 23epoch:train:401-500batch: iter_time=9.774e-05, forward_time=0.108, loss_ctc=78.934, loss_att=61.524, acc=0.709, loss=66.747, backward_time=0.755, grad_norm=111.165, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.707e-05, train_time=2.059 +[gpua003:0/64] 2023-07-07 09:42:53,514 (trainer:732) INFO: 23epoch:train:501-600batch: iter_time=9.807e-05, forward_time=0.108, loss_ctc=69.498, loss_att=53.920, acc=0.698, loss=58.594, backward_time=0.752, grad_norm=97.973, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.705e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 09:44:41,445 (trainer:732) INFO: 23epoch:train:601-700batch: iter_time=8.987e-05, forward_time=0.108, loss_ctc=83.563, loss_att=61.774, acc=0.691, loss=68.311, backward_time=0.762, grad_norm=122.324, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.703e-05, train_time=2.158 +[gpua003:0/64] 2023-07-07 09:46:26,252 (trainer:732) INFO: 23epoch:train:701-800batch: iter_time=9.492e-05, forward_time=0.109, loss_ctc=74.540, loss_att=56.661, acc=0.697, loss=62.025, backward_time=0.755, grad_norm=102.834, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.701e-05, train_time=2.096 +[gpua003:0/64] 2023-07-07 09:47:10,710 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-07 09:47:30,186 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 09:47:34,091 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 09:47:34,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-07 09:47:34,151 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 09:53:24,106 (trainer:732) INFO: 23epoch:train:801-900batch: iter_time=2.968, forward_time=0.137, loss_ctc=71.883, loss_att=53.675, acc=0.701, loss=59.138, backward_time=0.769, grad_norm=96.823, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.700e-05, train_time=8.356 +[gpua003:0/64] 2023-07-07 09:55:05,103 (trainer:732) INFO: 23epoch:train:901-1000batch: iter_time=1.047e-04, forward_time=0.110, loss_ctc=64.340, loss_att=51.496, acc=0.693, loss=55.349, backward_time=0.754, grad_norm=93.049, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.698e-05, train_time=2.020 +[gpua003:0/64] 2023-07-07 09:56:45,333 (trainer:732) INFO: 23epoch:train:1001-1100batch: iter_time=9.897e-05, forward_time=0.109, loss_ctc=82.590, loss_att=64.984, acc=0.698, loss=70.266, backward_time=0.751, grad_norm=115.985, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.696e-05, train_time=2.004 +[gpua003:0/64] 2023-07-07 09:58:25,170 (trainer:732) INFO: 23epoch:train:1101-1200batch: iter_time=9.800e-05, forward_time=0.109, loss_ctc=74.904, loss_att=57.611, acc=0.704, loss=62.799, backward_time=0.753, grad_norm=93.699, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.694e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 10:00:05,128 (trainer:732) INFO: 23epoch:train:1201-1300batch: iter_time=1.040e-04, forward_time=0.110, loss_ctc=77.700, loss_att=63.938, acc=0.709, loss=68.066, backward_time=0.753, grad_norm=120.473, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.692e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 10:01:45,015 (trainer:732) INFO: 23epoch:train:1301-1400batch: iter_time=1.311e-04, forward_time=0.110, loss_ctc=66.958, loss_att=50.399, acc=0.716, loss=55.367, backward_time=0.753, grad_norm=104.203, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.690e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 10:03:25,092 (trainer:732) INFO: 23epoch:train:1401-1500batch: iter_time=1.131e-04, forward_time=0.111, loss_ctc=79.489, loss_att=60.622, acc=0.688, loss=66.282, backward_time=0.753, grad_norm=110.078, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.689e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 10:05:04,751 (trainer:732) INFO: 23epoch:train:1501-1600batch: iter_time=9.681e-05, forward_time=0.109, loss_ctc=69.851, loss_att=56.849, acc=0.699, loss=60.749, backward_time=0.751, grad_norm=108.192, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.687e-05, train_time=1.993 +[gpua003:0/64] 2023-07-07 10:06:13,927 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-07 10:06:33,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 10:06:36,687 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 10:06:36,687 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-07 10:06:36,693 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 10:10:24,633 (trainer:732) INFO: 23epoch:train:1601-1700batch: iter_time=1.378, forward_time=0.109, loss_ctc=70.081, loss_att=51.925, acc=0.700, loss=57.372, backward_time=0.765, grad_norm=91.494, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.685e-05, train_time=6.397 +[gpua003:0/64] 2023-07-07 10:12:05,102 (trainer:732) INFO: 23epoch:train:1701-1800batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=70.081, loss_att=57.940, acc=0.707, loss=61.582, backward_time=0.756, grad_norm=100.076, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.683e-05, train_time=2.009 +[gpua003:0/64] 2023-07-07 10:13:45,172 (trainer:732) INFO: 23epoch:train:1801-1900batch: iter_time=1.097e-04, forward_time=0.108, loss_ctc=72.888, loss_att=55.204, acc=0.697, loss=60.509, backward_time=0.752, grad_norm=114.953, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.681e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 10:15:25,082 (trainer:732) INFO: 23epoch:train:1901-2000batch: iter_time=9.785e-05, forward_time=0.109, loss_ctc=83.550, loss_att=66.001, acc=0.695, loss=71.265, backward_time=0.753, grad_norm=99.143, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.680e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 10:17:04,949 (trainer:732) INFO: 23epoch:train:2001-2100batch: iter_time=9.886e-05, forward_time=0.110, loss_ctc=72.138, loss_att=59.700, acc=0.709, loss=63.431, backward_time=0.752, grad_norm=90.082, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.678e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 10:18:44,752 (trainer:732) INFO: 23epoch:train:2101-2200batch: iter_time=9.873e-05, forward_time=0.108, loss_ctc=71.072, loss_att=54.294, acc=0.707, loss=59.327, backward_time=0.752, grad_norm=109.276, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.676e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 10:20:24,385 (trainer:732) INFO: 23epoch:train:2201-2300batch: iter_time=9.689e-05, forward_time=0.107, loss_ctc=78.773, loss_att=58.998, acc=0.697, loss=64.930, backward_time=0.751, grad_norm=108.006, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.674e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 10:22:04,941 (trainer:732) INFO: 23epoch:train:2301-2400batch: iter_time=9.118e-05, forward_time=0.108, loss_ctc=74.926, loss_att=57.996, acc=0.700, loss=63.075, backward_time=0.752, grad_norm=123.642, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.672e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 10:23:49,235 (trainer:732) INFO: 23epoch:train:2401-2500batch: iter_time=9.225e-05, forward_time=0.107, loss_ctc=64.708, loss_att=53.380, acc=0.695, loss=56.779, backward_time=0.763, grad_norm=88.049, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.670e-05, train_time=2.086 +[gpua003:0/64] 2023-07-07 10:23:52,704 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-07 10:24:12,052 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 10:24:15,673 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 10:24:15,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-07 10:24:15,680 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 10:29:09,460 (trainer:732) INFO: 23epoch:train:2501-2600batch: iter_time=1.283, forward_time=0.109, loss_ctc=73.117, loss_att=58.099, acc=0.706, loss=62.605, backward_time=0.766, grad_norm=90.498, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.669e-05, train_time=6.404 +[gpua003:0/64] 2023-07-07 10:30:49,559 (trainer:732) INFO: 23epoch:train:2601-2700batch: iter_time=8.534e-05, forward_time=0.109, loss_ctc=64.315, loss_att=53.246, acc=0.691, loss=56.567, backward_time=0.752, grad_norm=89.794, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.667e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 10:32:29,624 (trainer:732) INFO: 23epoch:train:2701-2800batch: iter_time=8.962e-05, forward_time=0.109, loss_ctc=85.891, loss_att=63.051, acc=0.705, loss=69.903, backward_time=0.754, grad_norm=94.837, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.665e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 10:34:09,982 (trainer:732) INFO: 23epoch:train:2801-2900batch: iter_time=9.237e-05, forward_time=0.109, loss_ctc=74.116, loss_att=59.956, acc=0.701, loss=64.204, backward_time=0.753, grad_norm=89.065, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.663e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 10:35:50,367 (trainer:732) INFO: 23epoch:train:2901-3000batch: iter_time=8.691e-05, forward_time=0.109, loss_ctc=79.211, loss_att=61.574, acc=0.710, loss=66.865, backward_time=0.753, grad_norm=102.506, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.661e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 10:37:31,411 (trainer:732) INFO: 23epoch:train:3001-3100batch: iter_time=9.464e-05, forward_time=0.109, loss_ctc=65.890, loss_att=51.091, acc=0.706, loss=55.530, backward_time=0.755, grad_norm=86.526, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.660e-05, train_time=2.021 +[gpua003:0/64] 2023-07-07 10:39:12,463 (trainer:732) INFO: 23epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=78.191, loss_att=58.913, acc=0.696, loss=64.697, backward_time=0.758, grad_norm=107.870, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.658e-05, train_time=2.021 +[gpua003:0/64] 2023-07-07 10:40:55,933 (trainer:732) INFO: 23epoch:train:3201-3300batch: iter_time=1.031e-04, forward_time=0.108, loss_ctc=70.556, loss_att=54.398, acc=0.709, loss=59.245, backward_time=0.758, grad_norm=88.177, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.656e-05, train_time=2.069 +[gpua003:0/64] 2023-07-07 10:41:31,292 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-07 10:41:50,605 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 10:41:54,287 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 10:41:54,287 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-07 10:41:54,293 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 10:45:45,530 (trainer:732) INFO: 23epoch:train:3301-3400batch: iter_time=1.299, forward_time=0.108, loss_ctc=67.705, loss_att=53.054, acc=0.697, loss=57.449, backward_time=0.769, grad_norm=90.741, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.654e-05, train_time=5.792 +[gpua003:0/64] 2023-07-07 10:47:25,708 (trainer:732) INFO: 23epoch:train:3401-3500batch: iter_time=9.948e-05, forward_time=0.108, loss_ctc=69.513, loss_att=54.398, acc=0.710, loss=58.932, backward_time=0.753, grad_norm=90.557, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.653e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 10:49:05,710 (trainer:732) INFO: 23epoch:train:3501-3600batch: iter_time=9.595e-05, forward_time=0.109, loss_ctc=71.035, loss_att=55.784, acc=0.705, loss=60.359, backward_time=0.754, grad_norm=89.579, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.651e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 10:51:01,341 (trainer:732) INFO: 23epoch:train:3601-3700batch: iter_time=9.044e-05, forward_time=0.119, loss_ctc=84.230, loss_att=63.766, acc=0.703, loss=69.905, backward_time=0.776, grad_norm=98.539, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.649e-05, train_time=2.312 +[gpua003:0/64] 2023-07-07 10:52:56,875 (trainer:732) INFO: 23epoch:train:3701-3800batch: iter_time=6.396e-04, forward_time=0.131, loss_ctc=73.263, loss_att=59.816, acc=0.713, loss=63.850, backward_time=0.792, grad_norm=100.049, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.115, optim0_lr0=7.647e-05, train_time=2.310 +[gpua003:0/64] 2023-07-07 10:54:36,698 (trainer:732) INFO: 23epoch:train:3801-3900batch: iter_time=8.669e-05, forward_time=0.110, loss_ctc=66.313, loss_att=49.529, acc=0.713, loss=54.564, backward_time=0.753, grad_norm=92.675, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.645e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 10:56:17,417 (trainer:732) INFO: 23epoch:train:3901-4000batch: iter_time=9.704e-05, forward_time=0.112, loss_ctc=77.178, loss_att=56.536, acc=0.708, loss=62.729, backward_time=0.754, grad_norm=103.834, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.644e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 10:57:57,769 (trainer:732) INFO: 23epoch:train:4001-4100batch: iter_time=9.868e-05, forward_time=0.110, loss_ctc=72.979, loss_att=57.846, acc=0.699, loss=62.386, backward_time=0.753, grad_norm=115.524, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.642e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 10:59:19,759 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-07 10:59:39,162 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 10:59:42,862 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 10:59:42,862 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-07 10:59:42,869 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 11:04:06,757 (trainer:732) INFO: 23epoch:train:4101-4200batch: iter_time=2.609, forward_time=0.130, loss_ctc=65.738, loss_att=48.149, acc=0.704, loss=53.426, backward_time=0.768, grad_norm=87.940, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.115, optim0_lr0=7.640e-05, train_time=7.379 +[gpua003:0/64] 2023-07-07 11:05:49,241 (trainer:732) INFO: 23epoch:train:4201-4300batch: iter_time=1.057e-04, forward_time=0.109, loss_ctc=68.915, loss_att=58.433, acc=0.703, loss=61.578, backward_time=0.757, grad_norm=94.575, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.638e-05, train_time=2.050 +[gpua003:0/64] 2023-07-07 11:07:30,444 (trainer:732) INFO: 23epoch:train:4301-4400batch: iter_time=1.040e-04, forward_time=0.111, loss_ctc=71.161, loss_att=54.889, acc=0.690, loss=59.771, backward_time=0.754, grad_norm=100.822, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.636e-05, train_time=2.024 +[gpua003:0/64] 2023-07-07 11:09:10,593 (trainer:732) INFO: 23epoch:train:4401-4500batch: iter_time=1.108e-04, forward_time=0.110, loss_ctc=82.303, loss_att=66.740, acc=0.686, loss=71.409, backward_time=0.755, grad_norm=94.738, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.635e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 11:10:50,539 (trainer:732) INFO: 23epoch:train:4501-4600batch: iter_time=1.004e-04, forward_time=0.110, loss_ctc=71.919, loss_att=59.048, acc=0.707, loss=62.910, backward_time=0.753, grad_norm=85.604, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.633e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 11:12:30,296 (trainer:732) INFO: 23epoch:train:4601-4700batch: iter_time=1.109e-04, forward_time=0.110, loss_ctc=70.547, loss_att=55.273, acc=0.703, loss=59.856, backward_time=0.752, grad_norm=92.197, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.631e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 11:14:10,064 (trainer:732) INFO: 23epoch:train:4701-4800batch: iter_time=9.679e-05, forward_time=0.109, loss_ctc=78.198, loss_att=57.584, acc=0.701, loss=63.768, backward_time=0.754, grad_norm=100.076, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.629e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 11:15:50,073 (trainer:732) INFO: 23epoch:train:4801-4900batch: iter_time=1.089e-04, forward_time=0.110, loss_ctc=73.676, loss_att=58.368, acc=0.689, loss=62.960, backward_time=0.754, grad_norm=105.898, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.628e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 11:17:33,946 (trainer:732) INFO: 23epoch:train:4901-5000batch: iter_time=9.428e-05, forward_time=0.109, loss_ctc=64.333, loss_att=54.483, acc=0.690, loss=57.438, backward_time=0.757, grad_norm=94.071, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.626e-05, train_time=2.077 +[gpua003:0/64] 2023-07-07 11:17:39,457 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-07 11:17:59,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 11:18:02,629 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 11:18:02,629 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-07 11:18:02,635 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 11:23:09,216 (trainer:732) INFO: 23epoch:train:5001-5100batch: iter_time=1.440, forward_time=0.127, loss_ctc=71.836, loss_att=56.120, acc=0.706, loss=60.835, backward_time=0.765, grad_norm=102.820, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.624e-05, train_time=6.705 +[gpua003:0/64] 2023-07-07 11:24:49,430 (trainer:732) INFO: 23epoch:train:5101-5200batch: iter_time=9.982e-05, forward_time=0.108, loss_ctc=63.033, loss_att=52.578, acc=0.690, loss=55.715, backward_time=0.754, grad_norm=77.578, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.622e-05, train_time=2.004 +gpua087:2330954:2332476 [1] NCCL INFO comm 0xbc380f30 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua055:3866105:3867680 [2] NCCL INFO comm 0xa0bacc0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua087:2330955:2332481 [2] NCCL INFO comm 0x1091ecd0 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua031:1680702:1682220 [2] NCCL INFO comm 0x90042a50 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua055:3866104:3867675 [1] NCCL INFO comm 0x4ff24650 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua028:3269322:3270845 [1] NCCL INFO comm 0x50ff9ba0 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua025:63838:65355 [2] NCCL INFO comm 0xc1f876b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua060:2854969:2856486 [1] NCCL INFO comm 0x8c2cb6d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua025:63837:65357 [1] NCCL INFO comm 0xa196ac90 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua060:2854970:2856496 [2] NCCL INFO comm 0xb4b68d30 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua003:350635:352158 [2] NCCL INFO comm 0xc165ff50 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua003:350634:352156 [1] NCCL INFO comm 0xb8217e10 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua028:3269323:3270853 [2] NCCL INFO comm 0x4fe1d010 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua053:959076:960598 [2] NCCL INFO comm 0xa5547430 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua090:2294099:2295633 [2] NCCL INFO comm 0x508070c0 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua029:1226922:1228446 [1] NCCL INFO comm 0x91446d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua029:1226923:1228448 [2] NCCL INFO comm 0x9682050 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua057:1814426:1815949 [1] NCCL INFO comm 0xb6887810 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua057:1814427:1815959 [2] NCCL INFO comm 0x8ff8bf0 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua035:1685218:1686747 [2] NCCL INFO comm 0x5149e590 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua035:1685217:1686742 [1] NCCL INFO comm 0x94073350 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua098:2101209:2102740 [1] NCCL INFO comm 0xb77452f0 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua005:322786:324303 [1] NCCL INFO comm 0x9e527b50 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua005:322787:324304 [2] NCCL INFO comm 0xa671d450 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua098:2101210:2102744 [2] NCCL INFO comm 0xb13e4b0 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua010:1622002:1623518 [2] NCCL INFO comm 0x95597d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua074:989793:991318 [2] NCCL INFO comm 0x50124340 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +Process SpawnProcess-2: +gpua090:2294098:2295630 [1] NCCL INFO comm 0xb9291470 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua031:1680701:1682217 [1] NCCL INFO comm 0xb74170b0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 45] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804271 milliseconds before timing out. +gpua053:959075:960591 [1] NCCL INFO comm 0x50f9bf70 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-2: +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 21] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804411 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 17] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804302 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 14] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804276 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 1] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804307 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 53] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804259 milliseconds before timing out. +Process SpawnProcess-3: +Process SpawnProcess-3: +Traceback (most recent call last): +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( +RuntimeError: [Rank 2] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804368 milliseconds before timing out. + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 46] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804297 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 37] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804277 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 29] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804484 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 13] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804273 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 50] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804680 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 6] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804607 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 30] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804538 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 26] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804292 milliseconds before timing out. +gpua010:1622001:1623523 [1] NCCL INFO comm 0x8e6a9490 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua074:989792:991309 [1] NCCL INFO comm 0x91b8e50 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 5] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804559 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 41] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804434 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 42] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804489 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): +Process SpawnProcess-3: + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 18] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804353 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) +RuntimeError: [Rank 34] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804440 milliseconds before timing out. + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 58] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804403 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 61] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804595 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 62] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804654 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 10] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804678 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 22] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804467 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 38] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804274 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 54] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804266 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 25] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805539 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 33] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805578 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 57] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805585 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 9] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805928 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 49] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805925 milliseconds before timing out. +gpua005:322788:324302 [3] NCCL INFO comm 0xb7586590 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua053:959077:960604 [3] NCCL INFO comm 0x8f7ecf20 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 7] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1816754 milliseconds before timing out. +gpua087:2330956:2332486 [3] NCCL INFO comm 0x4fa40250 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua025:63839:65363 [3] NCCL INFO comm 0xc1e534d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua029:1226924:1228445 [3] NCCL INFO comm 0x502a1280 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 55] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817525 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 35] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817402 milliseconds before timing out. +gpua098:2101211:2102741 [3] NCCL INFO comm 0xb9e844a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua028:3269324:3270856 [3] NCCL INFO comm 0x50758ff0 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 15] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817790 milliseconds before timing out. +gpua074:989794:991315 [3] NCCL INFO comm 0x51823d90 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +srun: error: gpua005: task 1: Exited with exit code 1 +srun: error: gpua029: task 5: Exited with exit code 1 +srun: error: gpua025: task 3: Exited with exit code 1 +srun: error: gpua060: task 11: Exited with exit code 1 +srun: error: gpua035: task 7: Exited with exit code 1 +srun: error: gpua003: task 0: Exited with exit code 1 +srun: error: gpua055: task 9: Exited with exit code 1 +srun: error: gpua010: task 2: Exited with exit code 1 +srun: error: gpua087: task 13: Exited with exit code 1 +srun: error: gpua057: task 10: Exited with exit code 1 +srun: error: gpua031: task 6: Exited with exit code 1 +srun: error: gpua090: task 14: Exited with exit code 1 +srun: error: gpua053: task 8: Exited with exit code 1 +srun: error: gpua028: task 4: Exited with exit code 1 +srun: error: gpua098: task 15: Exited with exit code 1 +srun: error: gpua074: task 12: Exited with exit code 1 +# Accounting: begin_time=1688614643 +# Accounting: end_time=1688748923 +# Accounting: time=134280 threads=1 +# Finished at Fri Jul 7 11:55:23 CDT 2023 with status 1