diff --git "a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log" "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log" new file mode 100644--- /dev/null +++ "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log" @@ -0,0 +1,4663 @@ +# Running on gpub002.delta.ncsa.illinois.edu +# Started at Wed Jul 12 13:15:16 CDT 2023 +# SLURMD_NODENAME=gpub002 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2147805 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2147805 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[002,008,010-011,019,027-028,030,050-053,073-074,078,084]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[002,008,010-011,019,027-028,030,050-053,073-074,078,084]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=2108111 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub002 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +[gpub002:0/64] 2023-07-12 13:18:48,677 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub002:0/64] 2023-07-12 13:18:49,830 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub002:0/64] 2023-07-12 13:18:49,864 (s2t:483) INFO: Vocabulary size: 50002 +[gpub002:0/64] 2023-07-12 13:19:04,645 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub002:0/64] 2023-07-12 13:19:04,667 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub002:0/64] 2023-07-12 13:19:05,366 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub002:0/64] 2023-07-12 13:19:13,983 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 13:19:14,187 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 13:19:14,187 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub002:0/64] 2023-07-12 13:19:14,194 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub002:0/64] 2023-07-12 13:19:14,680 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub002:0/64] 2023-07-12 13:19:42,133 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub002:2108199:2108199 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2108199:2108199 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2108199:2108199 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub002:0/64] 2023-07-12 13:19:47,191 (trainer:284) INFO: 40/50epoch started +[gpub002:0/64] 2023-07-12 13:19:47,237 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-12 13:20:04,995 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 13:20:08,308 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 13:20:08,308 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-12 13:20:08,314 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpub011:1718215:1718215 [0] NCCL INFO cudaDriverVersion 12010 +gpub011:1718215:1718215 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1718215:1718215 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1718215:1718290 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1718215:1718290 [0] NCCL INFO Using network IB +gpub011:1718215:1718290 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub011:1718215:1718290 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub011:1718215:1718290 [0] NCCL INFO Connected all rings +gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Connected all trees +gpub011:1718215:1718290 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub011:1718215:1718290 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1718215:1718290 [0] NCCL INFO comm 0x8e227720 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub011:1718216:1718216 [1] NCCL INFO cudaDriverVersion 12010 +gpub011:1718216:1718216 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1718216:1718216 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1718216:1718288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1718216:1718288 [1] NCCL INFO Using network IB +gpub011:1718216:1718288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub011:1718216:1718288 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub011:1718216:1718288 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub011:1718216:1718288 [1] NCCL INFO Connected all rings +gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub011:1718216:1718288 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub011:1718216:1718288 [1] NCCL INFO Connected all trees +gpub011:1718216:1718288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub011:1718216:1718288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1718216:1718288 [1] NCCL INFO comm 0x9d351fa0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub011:1718218:1718218 [3] NCCL INFO cudaDriverVersion 12010 +gpub011:1718218:1718218 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1718218:1718218 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1718218:1718289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1718218:1718289 [3] NCCL INFO Using network IB +gpub011:1718218:1718289 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub011:1718218:1718289 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub011:1718218:1718289 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub011:1718218:1718289 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub011:1718218:1718289 [3] NCCL INFO Connected all rings +gpub011:1718218:1718289 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub011:1718218:1718289 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub011:1718218:1718289 [3] NCCL INFO Connected all trees +gpub011:1718218:1718289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub011:1718218:1718289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1718218:1718289 [3] NCCL INFO comm 0x4fae7090 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub084:95632:95632 [3] NCCL INFO cudaDriverVersion 12010 +gpub084:95632:95632 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:95632:95632 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:95632:95714 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:95632:95714 [3] NCCL INFO Using network IB +gpub084:95632:95714 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub084:95632:95714 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub084:95632:95714 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub084:95632:95714 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub084:95632:95714 [3] NCCL INFO Connected all rings +gpub084:95632:95714 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub084:95632:95714 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub084:95632:95714 [3] NCCL INFO Connected all trees +gpub084:95632:95714 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:95632:95714 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:95632:95714 [3] NCCL INFO comm 0x9d28050 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub011:1718217:1718217 [2] NCCL INFO cudaDriverVersion 12010 +gpub011:1718217:1718217 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1718217:1718217 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1718217:1718291 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1718217:1718291 [2] NCCL INFO Using network IB +gpub011:1718217:1718291 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub011:1718217:1718291 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub011:1718217:1718291 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub011:1718217:1718291 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub011:1718217:1718291 [2] NCCL INFO Connected all rings +gpub011:1718217:1718291 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub011:1718217:1718291 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub011:1718217:1718291 [2] NCCL INFO Connected all trees +gpub011:1718217:1718291 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub011:1718217:1718291 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1718217:1718291 [2] NCCL INFO comm 0x50a009a0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub019:2611991:2611991 [2] NCCL INFO cudaDriverVersion 12010 +gpub019:2611991:2611991 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0> +gpub019:2611991:2611991 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub019:2611991:2612065 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0> +gpub019:2611991:2612065 [2] NCCL INFO Using network IB +gpub019:2611991:2612065 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub019:2611991:2612065 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub019:2611991:2612065 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub019:2611991:2612065 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub019:2611991:2612065 [2] NCCL INFO Connected all rings +gpub019:2611991:2612065 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub019:2611991:2612065 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub019:2611991:2612065 [2] NCCL INFO Connected all trees +gpub019:2611991:2612065 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub019:2611991:2612065 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub019:2611991:2612065 [2] NCCL INFO comm 0x10048ab0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:95631:95631 [2] NCCL INFO cudaDriverVersion 12010 +gpub084:95631:95631 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:95631:95631 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:95631:95712 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:95631:95712 [2] NCCL INFO Using network IB +gpub084:95631:95712 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub084:95631:95712 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub084:95631:95712 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub084:95631:95712 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub084:95631:95712 [2] NCCL INFO Connected all rings +gpub084:95631:95712 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub084:95631:95712 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub084:95631:95712 [2] NCCL INFO Connected all trees +gpub084:95631:95712 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:95631:95712 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:95631:95712 [2] NCCL INFO comm 0x940c750 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub074:3855653:3855653 [1] NCCL INFO cudaDriverVersion 12010 +gpub074:3855653:3855653 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3855653:3855653 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3855653:3855727 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3855653:3855727 [1] NCCL INFO Using network IB +gpub074:3855653:3855727 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub074:3855653:3855727 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub074:3855653:3855727 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub074:3855653:3855727 [1] NCCL INFO Connected all rings +gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub074:3855653:3855727 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub074:3855653:3855727 [1] NCCL INFO Connected all trees +gpub074:3855653:3855727 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3855653:3855727 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3855653:3855727 [1] NCCL INFO comm 0xaa1acf00 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub074:3855655:3855655 [3] NCCL INFO cudaDriverVersion 12010 +gpub074:3855655:3855655 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3855655:3855655 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3855655:3855725 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3855655:3855725 [3] NCCL INFO Using network IB +gpub074:3855655:3855725 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub074:3855655:3855725 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub074:3855655:3855725 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub074:3855655:3855725 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub074:3855655:3855725 [3] NCCL INFO Connected all rings +gpub074:3855655:3855725 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub074:3855655:3855725 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub074:3855655:3855725 [3] NCCL INFO Connected all trees +gpub074:3855655:3855725 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3855655:3855725 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3855655:3855725 [3] NCCL INFO comm 0x509a28d0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub074:3855652:3855652 [0] NCCL INFO cudaDriverVersion 12010 +gpub074:3855652:3855652 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3855652:3855652 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3855652:3855726 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3855652:3855726 [0] NCCL INFO Using network IB +gpub074:3855652:3855726 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub074:3855652:3855726 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub074:3855652:3855726 [0] NCCL INFO Connected all rings +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Connected all trees +gpub074:3855652:3855726 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3855652:3855726 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3855652:3855726 [0] NCCL INFO comm 0x8e164a10 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub019:2611989:2611989 [0] NCCL INFO cudaDriverVersion 12010 +gpub019:2611989:2611989 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0> +gpub019:2611989:2611989 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub019:2611989:2612066 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0> +gpub019:2611989:2612066 [0] NCCL INFO Using network IB +gpub019:2611989:2612066 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub019:2611989:2612066 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub019:2611989:2612066 [0] NCCL INFO Connected all rings +gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Connected all trees +gpub019:2611989:2612066 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub019:2611989:2612066 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub019:2611989:2612066 [0] NCCL INFO comm 0xa8ee89f0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub019:2611992:2611992 [3] NCCL INFO cudaDriverVersion 12010 +gpub019:2611992:2611992 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0> +gpub019:2611992:2611992 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub019:2611992:2612064 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0> +gpub019:2611992:2612064 [3] NCCL INFO Using network IB +gpub019:2611992:2612064 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub019:2611992:2612064 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub019:2611992:2612064 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub019:2611992:2612064 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub019:2611992:2612064 [3] NCCL INFO Connected all rings +gpub019:2611992:2612064 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub019:2611992:2612064 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub019:2611992:2612064 [3] NCCL INFO Connected all trees +gpub019:2611992:2612064 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub019:2611992:2612064 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub019:2611992:2612064 [3] NCCL INFO comm 0x4fcf2500 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub028:3104067:3104067 [0] NCCL INFO cudaDriverVersion 12010 +gpub028:3104067:3104067 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:3104067:3104067 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:3104067:3104152 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0> +gpub028:3104067:3104152 [0] NCCL INFO Using network IB +gpub028:3104067:3104152 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub028:3104067:3104152 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub028:3104067:3104152 [0] NCCL INFO Connected all rings +gpub008:2789793:2789793 [0] NCCL INFO cudaDriverVersion 12010 +gpub008:2789793:2789793 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2789793:2789793 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2789793:2789871 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2789793:2789871 [0] NCCL INFO Using network IB +gpub008:2789793:2789871 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub008:2789793:2789871 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub008:2789793:2789871 [0] NCCL INFO Connected all rings +gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Connected all trees +gpub028:3104067:3104152 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:3104067:3104152 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:3104067:3104152 [0] NCCL INFO comm 0xa17fea0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Connected all trees +gpub008:2789793:2789871 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2789793:2789871 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2789793:2789871 [0] NCCL INFO comm 0x9e41e050 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:1746407:1746407 [0] NCCL INFO cudaDriverVersion 12010 +gpub010:1746407:1746407 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1746407:1746407 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1746407:1746486 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0> +gpub010:1746407:1746486 [0] NCCL INFO Using network IB +gpub010:1746407:1746486 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub010:1746407:1746486 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub010:1746407:1746486 [0] NCCL INFO Connected all rings +gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Connected all trees +gpub010:1746407:1746486 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub010:1746407:1746486 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1746407:1746486 [0] NCCL INFO comm 0xa1f0110 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:1746410:1746410 [3] NCCL INFO cudaDriverVersion 12010 +gpub010:1746410:1746410 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1746410:1746410 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1746410:1746485 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0> +gpub010:1746410:1746485 [3] NCCL INFO Using network IB +gpub010:1746410:1746485 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub010:1746410:1746485 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub010:1746410:1746485 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub010:1746410:1746485 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub010:1746410:1746485 [3] NCCL INFO Connected all rings +gpub010:1746410:1746485 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub010:1746410:1746485 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub010:1746410:1746485 [3] NCCL INFO Connected all trees +gpub010:1746410:1746485 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub010:1746410:1746485 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1746410:1746485 [3] NCCL INFO comm 0x95b8eb50 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub027:3834396:3834396 [0] NCCL INFO cudaDriverVersion 12010 +gpub027:3834396:3834396 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:3834396:3834396 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:3834396:3834476 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:3834396:3834476 [0] NCCL INFO Using network IB +gpub027:3834396:3834476 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub027:3834396:3834476 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub027:3834396:3834476 [0] NCCL INFO Connected all rings +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Connected all trees +gpub027:3834396:3834476 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:3834396:3834476 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:3834396:3834476 [0] NCCL INFO comm 0x8b8afd50 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub019:2611990:2611990 [1] NCCL INFO cudaDriverVersion 12010 +gpub019:2611990:2611990 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0> +gpub019:2611990:2611990 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub019:2611990:2612063 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0> +gpub019:2611990:2612063 [1] NCCL INFO Using network IB +gpub019:2611990:2612063 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub019:2611990:2612063 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub019:2611990:2612063 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub019:2611990:2612063 [1] NCCL INFO Connected all rings +gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub019:2611990:2612063 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub019:2611990:2612063 [1] NCCL INFO Connected all trees +gpub019:2611990:2612063 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub019:2611990:2612063 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub019:2611990:2612063 [1] NCCL INFO comm 0x8916a60 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub030:2867869:2867869 [0] NCCL INFO cudaDriverVersion 12010 +gpub030:2867869:2867869 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2867869:2867869 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2867869:2867948 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2867869:2867948 [0] NCCL INFO Using network IB +gpub030:2867869:2867948 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub030:2867869:2867948 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub030:2867869:2867948 [0] NCCL INFO Connected all rings +gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Connected all trees +gpub030:2867869:2867948 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2867869:2867948 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2867869:2867948 [0] NCCL INFO comm 0x236c1590 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub074:3855654:3855654 [2] NCCL INFO cudaDriverVersion 12010 +gpub074:3855654:3855654 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3855654:3855654 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3855654:3855724 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3855654:3855724 [2] NCCL INFO Using network IB +gpub074:3855654:3855724 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub074:3855654:3855724 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub074:3855654:3855724 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub074:3855654:3855724 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub074:3855654:3855724 [2] NCCL INFO Connected all rings +gpub074:3855654:3855724 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub074:3855654:3855724 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub074:3855654:3855724 [2] NCCL INFO Connected all trees +gpub074:3855654:3855724 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3855654:3855724 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3855654:3855724 [2] NCCL INFO comm 0xba937820 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub030:2867872:2867872 [3] NCCL INFO cudaDriverVersion 12010 +gpub030:2867872:2867872 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2867872:2867872 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2867872:2867950 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2867872:2867950 [3] NCCL INFO Using network IB +gpub030:2867872:2867950 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub030:2867872:2867950 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub030:2867872:2867950 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub030:2867872:2867950 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub030:2867872:2867950 [3] NCCL INFO Connected all rings +gpub030:2867872:2867950 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub030:2867872:2867950 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub030:2867872:2867950 [3] NCCL INFO Connected all trees +gpub030:2867872:2867950 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2867872:2867950 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2867872:2867950 [3] NCCL INFO comm 0x8db50450 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub078:387633:387633 [0] NCCL INFO cudaDriverVersion 12010 +gpub078:387633:387633 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:387633:387633 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:387633:387710 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:387633:387710 [0] NCCL INFO Using network IB +gpub078:387633:387710 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub078:387633:387710 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:387633:387710 [0] NCCL INFO Connected all rings +gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Connected all trees +gpub078:387633:387710 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:387633:387710 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:387633:387710 [0] NCCL INFO comm 0x8b083970 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub030:2867870:2867870 [1] NCCL INFO cudaDriverVersion 12010 +gpub030:2867870:2867870 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2867870:2867870 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2867870:2867949 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2867870:2867949 [1] NCCL INFO Using network IB +gpub030:2867870:2867949 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub030:2867870:2867949 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub030:2867870:2867949 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub030:2867870:2867949 [1] NCCL INFO Connected all rings +gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub030:2867870:2867949 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub030:2867870:2867949 [1] NCCL INFO Connected all trees +gpub030:2867870:2867949 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2867870:2867949 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2867870:2867949 [1] NCCL INFO comm 0x9c29c010 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:2789796:2789796 [3] NCCL INFO cudaDriverVersion 12010 +gpub008:2789796:2789796 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2789796:2789796 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2789796:2789872 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2789796:2789872 [3] NCCL INFO Using network IB +gpub008:2789796:2789872 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub008:2789796:2789872 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub008:2789796:2789872 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub008:2789796:2789872 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub008:2789796:2789872 [3] NCCL INFO Connected all rings +gpub008:2789796:2789872 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub008:2789796:2789872 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub008:2789796:2789872 [3] NCCL INFO Connected all trees +gpub008:2789796:2789872 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2789796:2789872 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2789796:2789872 [3] NCCL INFO comm 0x50597af0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:2037082:2037082 [0] NCCL INFO cudaDriverVersion 12010 +gpub053:2037082:2037082 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:2037082:2037082 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:2037082:2037160 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:2037082:2037160 [0] NCCL INFO Using network IB +gpub053:2037082:2037160 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub053:2037082:2037160 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub053:2037082:2037160 [0] NCCL INFO Connected all rings +gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Connected all trees +gpub053:2037082:2037160 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:2037082:2037160 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:2037082:2037160 [0] NCCL INFO comm 0x50aa6090 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub027:3834399:3834399 [3] NCCL INFO cudaDriverVersion 12010 +gpub027:3834399:3834399 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:3834399:3834399 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:3834399:3834474 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:3834399:3834474 [3] NCCL INFO Using network IB +gpub027:3834399:3834474 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub027:3834399:3834474 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub027:3834399:3834474 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub027:3834399:3834474 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub027:3834399:3834474 [3] NCCL INFO Connected all rings +gpub027:3834399:3834474 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub027:3834399:3834474 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub027:3834399:3834474 [3] NCCL INFO Connected all trees +gpub027:3834399:3834474 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:3834399:3834474 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:3834399:3834474 [3] NCCL INFO comm 0x8f1f3890 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:2037083:2037083 [1] NCCL INFO cudaDriverVersion 12010 +gpub053:2037083:2037083 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:2037083:2037083 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:2037083:2037161 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:2037083:2037161 [1] NCCL INFO Using network IB +gpub053:2037083:2037161 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub053:2037083:2037161 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub053:2037083:2037161 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub053:2037083:2037161 [1] NCCL INFO Connected all rings +gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub053:2037083:2037161 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub053:2037083:2037161 [1] NCCL INFO Connected all trees +gpub053:2037083:2037161 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:2037083:2037161 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:2037083:2037161 [1] NCCL INFO comm 0x4f89c530 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:2789795:2789795 [2] NCCL INFO cudaDriverVersion 12010 +gpub008:2789795:2789795 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2789795:2789795 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2789795:2789874 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2789795:2789874 [2] NCCL INFO Using network IB +gpub008:2789795:2789874 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub008:2789795:2789874 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub008:2789795:2789874 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub008:2789795:2789874 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub008:2789795:2789874 [2] NCCL INFO Connected all rings +gpub008:2789795:2789874 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub008:2789795:2789874 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub008:2789795:2789874 [2] NCCL INFO Connected all trees +gpub008:2789795:2789874 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2789795:2789874 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2789795:2789874 [2] NCCL INFO comm 0xb7cc7790 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:387636:387636 [3] NCCL INFO cudaDriverVersion 12010 +gpub078:387636:387636 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:387636:387636 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:387636:387711 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:387636:387711 [3] NCCL INFO Using network IB +gpub078:387636:387711 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub078:387636:387711 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub078:387636:387711 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:387636:387711 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:387636:387711 [3] NCCL INFO Connected all rings +gpub078:387636:387711 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:387636:387711 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:387636:387711 [3] NCCL INFO Connected all trees +gpub078:387636:387711 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:387636:387711 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:387636:387711 [3] NCCL INFO comm 0x50bf4280 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub073:748599:748599 [2] NCCL INFO cudaDriverVersion 12010 +gpub073:748599:748599 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:748599:748599 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:748599:748672 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:748599:748672 [2] NCCL INFO Using network IB +gpub073:748599:748672 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub073:748599:748672 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub073:748599:748672 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub073:748599:748672 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub073:748599:748672 [2] NCCL INFO Connected all rings +gpub073:748599:748672 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub073:748599:748672 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub073:748599:748672 [2] NCCL INFO Connected all trees +gpub073:748599:748672 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:748599:748672 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:748599:748672 [2] NCCL INFO comm 0xa2d1650 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub053:2037084:2037084 [2] NCCL INFO cudaDriverVersion 12010 +gpub053:2037084:2037084 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:2037084:2037084 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:2037084:2037163 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:2037084:2037163 [2] NCCL INFO Using network IB +gpub053:2037084:2037163 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub053:2037084:2037163 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub053:2037084:2037163 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub053:2037084:2037163 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub053:2037084:2037163 [2] NCCL INFO Connected all rings +gpub053:2037084:2037163 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub053:2037084:2037163 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub053:2037084:2037163 [2] NCCL INFO Connected all trees +gpub053:2037084:2037163 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:2037084:2037163 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:2037084:2037163 [2] NCCL INFO comm 0x8c08e1a0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:387635:387635 [2] NCCL INFO cudaDriverVersion 12010 +gpub078:387635:387635 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:387635:387635 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:387635:387713 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:387635:387713 [2] NCCL INFO Using network IB +gpub078:387635:387713 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub078:387635:387713 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub078:387635:387713 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:387635:387713 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:387635:387713 [2] NCCL INFO Connected all rings +gpub078:387635:387713 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:387635:387713 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:387635:387713 [2] NCCL INFO Connected all trees +gpub078:387635:387713 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:387635:387713 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:387635:387713 [2] NCCL INFO comm 0x9a633940 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub028:3104070:3104070 [3] NCCL INFO cudaDriverVersion 12010 +gpub028:3104070:3104070 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:3104070:3104070 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:3104070:3104149 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0> +gpub028:3104070:3104149 [3] NCCL INFO Using network IB +gpub028:3104070:3104149 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub028:3104070:3104149 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub028:3104070:3104149 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub028:3104070:3104149 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub028:3104070:3104149 [3] NCCL INFO Connected all rings +gpub028:3104070:3104149 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub028:3104070:3104149 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub028:3104070:3104149 [3] NCCL INFO Connected all trees +gpub028:3104070:3104149 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:3104070:3104149 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:3104070:3104149 [3] NCCL INFO comm 0xb81c6b50 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:2037085:2037085 [3] NCCL INFO cudaDriverVersion 12010 +gpub053:2037085:2037085 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:2037085:2037085 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:2037085:2037162 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:2037085:2037162 [3] NCCL INFO Using network IB +gpub053:2037085:2037162 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub053:2037085:2037162 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub053:2037085:2037162 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub053:2037085:2037162 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub053:2037085:2037162 [3] NCCL INFO Connected all rings +gpub053:2037085:2037162 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub053:2037085:2037162 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub053:2037085:2037162 [3] NCCL INFO Connected all trees +gpub053:2037085:2037162 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:2037085:2037162 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:2037085:2037162 [3] NCCL INFO comm 0x5026aaa0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub050:2539553:2539553 [1] NCCL INFO cudaDriverVersion 12010 +gpub050:2539553:2539553 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2539553:2539553 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2539553:2539629 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2539553:2539629 [1] NCCL INFO Using network IB +gpub050:2539553:2539629 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub050:2539553:2539629 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub050:2539553:2539629 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub050:2539553:2539629 [1] NCCL INFO Connected all rings +gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub050:2539553:2539629 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub050:2539553:2539629 [1] NCCL INFO Connected all trees +gpub050:2539553:2539629 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2539553:2539629 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2539553:2539629 [1] NCCL INFO comm 0xa4859b0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub027:3834397:3834397 [1] NCCL INFO cudaDriverVersion 12010 +gpub027:3834397:3834397 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:3834397:3834397 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:3834397:3834475 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:3834397:3834475 [1] NCCL INFO Using network IB +gpub027:3834397:3834475 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub027:3834397:3834475 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub027:3834397:3834475 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub027:3834397:3834475 [1] NCCL INFO Connected all rings +gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub027:3834397:3834475 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub027:3834397:3834475 [1] NCCL INFO Connected all trees +gpub027:3834397:3834475 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:3834397:3834475 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:3834397:3834475 [1] NCCL INFO comm 0x8ed34290 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub028:3104068:3104068 [1] NCCL INFO cudaDriverVersion 12010 +gpub028:3104068:3104068 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:3104068:3104068 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:3104068:3104151 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0> +gpub028:3104068:3104151 [1] NCCL INFO Using network IB +gpub028:3104068:3104151 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub028:3104068:3104151 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub028:3104068:3104151 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub028:3104068:3104151 [1] NCCL INFO Connected all rings +gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub028:3104068:3104151 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub028:3104068:3104151 [1] NCCL INFO Connected all trees +gpub028:3104068:3104151 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:3104068:3104151 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:3104068:3104151 [1] NCCL INFO comm 0xb8c85b80 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub052:2277064:2277064 [2] NCCL INFO cudaDriverVersion 12010 +gpub052:2277064:2277064 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2277064:2277064 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2277064:2277141 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2277064:2277141 [2] NCCL INFO Using network IB +gpub052:2277064:2277141 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub052:2277064:2277141 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub052:2277064:2277141 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub052:2277064:2277141 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub052:2277064:2277141 [2] NCCL INFO Connected all rings +gpub052:2277064:2277141 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub052:2277064:2277141 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub052:2277064:2277141 [2] NCCL INFO Connected all trees +gpub052:2277064:2277141 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2277064:2277141 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2277064:2277141 [2] NCCL INFO comm 0xa4d0c250 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:387634:387634 [1] NCCL INFO cudaDriverVersion 12010 +gpub078:387634:387634 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:387634:387634 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:387634:387712 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:387634:387712 [1] NCCL INFO Using network IB +gpub078:387634:387712 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub078:387634:387712 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:387634:387712 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:387634:387712 [1] NCCL INFO Connected all rings +gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:387634:387712 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:387634:387712 [1] NCCL INFO Connected all trees +gpub078:387634:387712 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:387634:387712 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:387634:387712 [1] NCCL INFO comm 0xb893bfd0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub051:3225329:3225329 [1] NCCL INFO cudaDriverVersion 12010 +gpub051:3225329:3225329 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3225329:3225329 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3225329:3225407 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3225329:3225407 [1] NCCL INFO Using network IB +gpub051:3225329:3225407 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub051:3225329:3225407 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub051:3225329:3225407 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub051:3225329:3225407 [1] NCCL INFO Connected all rings +gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub084:95630:95630 [1] NCCL INFO cudaDriverVersion 12010 +gpub084:95630:95630 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:95630:95630 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:95630:95713 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:95630:95713 [1] NCCL INFO Using network IB +gpub084:95630:95713 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub084:95630:95713 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub084:95630:95713 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub084:95630:95713 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub084:95630:95713 [1] NCCL INFO Connected all rings +gpub084:95630:95713 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub084:95630:95713 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub084:95630:95713 [1] NCCL INFO Connected all trees +gpub051:3225329:3225407 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub051:3225329:3225407 [1] NCCL INFO Connected all trees +gpub051:3225329:3225407 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3225329:3225407 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3225329:3225407 [1] NCCL INFO comm 0xa2b18990 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub084:95630:95713 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:95630:95713 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:95630:95713 [1] NCCL INFO comm 0x505266b0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub030:2867871:2867871 [2] NCCL INFO cudaDriverVersion 12010 +gpub030:2867871:2867871 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2867871:2867871 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2867871:2867947 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2867871:2867947 [2] NCCL INFO Using network IB +gpub030:2867871:2867947 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub030:2867871:2867947 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub030:2867871:2867947 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub030:2867871:2867947 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub030:2867871:2867947 [2] NCCL INFO Connected all rings +gpub030:2867871:2867947 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub030:2867871:2867947 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub002:2108202:2108202 [3] NCCL INFO cudaDriverVersion 12010 +gpub002:2108202:2108202 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2108202:2108202 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2108202:2108274 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2108202:2108274 [3] NCCL INFO Using network IB +gpub002:2108202:2108274 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub002:2108202:2108274 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub002:2108202:2108274 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub002:2108202:2108274 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub002:2108202:2108274 [3] NCCL INFO Connected all rings +gpub002:2108202:2108274 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub002:2108202:2108274 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub030:2867871:2867947 [2] NCCL INFO Connected all trees +gpub030:2867871:2867947 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2867871:2867947 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2867871:2867947 [2] NCCL INFO comm 0x516c8220 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub002:2108202:2108274 [3] NCCL INFO Connected all trees +gpub002:2108202:2108274 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2108202:2108274 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2108202:2108274 [3] NCCL INFO comm 0xba66c350 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub028:3104069:3104069 [2] NCCL INFO cudaDriverVersion 12010 +gpub028:3104069:3104069 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:3104069:3104069 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:3104069:3104150 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0> +gpub028:3104069:3104150 [2] NCCL INFO Using network IB +gpub028:3104069:3104150 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub028:3104069:3104150 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub028:3104069:3104150 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub028:3104069:3104150 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub028:3104069:3104150 [2] NCCL INFO Connected all rings +gpub028:3104069:3104150 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub028:3104069:3104150 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub028:3104069:3104150 [2] NCCL INFO Connected all trees +gpub028:3104069:3104150 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:3104069:3104150 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:3104069:3104150 [2] NCCL INFO comm 0x50c3cd20 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:95629:95629 [0] NCCL INFO cudaDriverVersion 12010 +gpub084:95629:95629 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:95629:95629 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:95629:95715 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:95629:95715 [0] NCCL INFO Using network IB +gpub084:95629:95715 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub084:95629:95715 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub084:95629:95715 [0] NCCL INFO Connected all rings +gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Connected all trees +gpub084:95629:95715 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:95629:95715 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:95629:95715 [0] NCCL INFO comm 0x4f579950 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub050:2539555:2539555 [3] NCCL INFO cudaDriverVersion 12010 +gpub050:2539555:2539555 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2539555:2539555 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2539555:2539630 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2539555:2539630 [3] NCCL INFO Using network IB +gpub050:2539555:2539630 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub050:2539555:2539630 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub050:2539555:2539630 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub050:2539555:2539630 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub050:2539555:2539630 [3] NCCL INFO Connected all rings +gpub050:2539555:2539630 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub050:2539555:2539630 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub050:2539555:2539630 [3] NCCL INFO Connected all trees +gpub050:2539555:2539630 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2539555:2539630 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2539555:2539630 [3] NCCL INFO comm 0xb939ca50 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub052:2277062:2277062 [0] NCCL INFO cudaDriverVersion 12010 +gpub052:2277062:2277062 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2277062:2277062 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2277062:2277138 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2277062:2277138 [0] NCCL INFO Using network IB +gpub052:2277062:2277138 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub052:2277062:2277138 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub052:2277062:2277138 [0] NCCL INFO Connected all rings +gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Connected all trees +gpub052:2277062:2277138 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2277062:2277138 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2277062:2277138 [0] NCCL INFO comm 0x8b3e450 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:1746408:1746408 [1] NCCL INFO cudaDriverVersion 12010 +gpub010:1746408:1746408 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1746408:1746408 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1746408:1746484 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0> +gpub010:1746408:1746484 [1] NCCL INFO Using network IB +gpub010:1746408:1746484 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub010:1746408:1746484 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub010:1746408:1746484 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub010:1746408:1746484 [1] NCCL INFO Connected all rings +gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub010:1746408:1746484 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub010:1746408:1746484 [1] NCCL INFO Connected all trees +gpub010:1746408:1746484 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub010:1746408:1746484 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1746408:1746484 [1] NCCL INFO comm 0xab889a50 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub010:1746409:1746409 [2] NCCL INFO cudaDriverVersion 12010 +gpub010:1746409:1746409 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1746409:1746409 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1746409:1746487 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0> +gpub010:1746409:1746487 [2] NCCL INFO Using network IB +gpub010:1746409:1746487 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub010:1746409:1746487 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub010:1746409:1746487 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub010:1746409:1746487 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub010:1746409:1746487 [2] NCCL INFO Connected all rings +gpub010:1746409:1746487 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub010:1746409:1746487 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub010:1746409:1746487 [2] NCCL INFO Connected all trees +gpub010:1746409:1746487 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub010:1746409:1746487 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1746409:1746487 [2] NCCL INFO comm 0x8d5443e0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub002:2108200:2108200 [1] NCCL INFO cudaDriverVersion 12010 +gpub002:2108200:2108200 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2108200:2108200 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2108200:2108275 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2108200:2108275 [1] NCCL INFO Using network IB +gpub002:2108200:2108275 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub002:2108200:2108275 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub002:2108200:2108275 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub002:2108200:2108275 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub002:2108200:2108275 [1] NCCL INFO Connected all rings +gpub002:2108200:2108275 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub002:2108200:2108275 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub002:2108200:2108275 [1] NCCL INFO Connected all trees +gpub002:2108200:2108275 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2108200:2108275 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2108200:2108275 [1] NCCL INFO comm 0x8e8ce8d0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub002:2108199:2108273 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2108199:2108273 [0] NCCL INFO Using network IB +gpub002:2108199:2108273 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub002:2108199:2108273 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub002:2108199:2108273 [0] NCCL INFO Connected all rings +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Connected all trees +gpub002:2108199:2108273 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2108199:2108273 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2108199:2108273 [0] NCCL INFO comm 0x8d0b120 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub073:748600:748600 [3] NCCL INFO cudaDriverVersion 12010 +gpub073:748600:748600 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:748600:748600 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:748600:748671 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:748600:748671 [3] NCCL INFO Using network IB +gpub073:748600:748671 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub073:748600:748671 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub073:748600:748671 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub073:748600:748671 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub073:748600:748671 [3] NCCL INFO Connected all rings +gpub073:748600:748671 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub073:748600:748671 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub051:3225328:3225328 [0] NCCL INFO cudaDriverVersion 12010 +gpub051:3225328:3225328 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3225328:3225328 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3225328:3225405 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3225328:3225405 [0] NCCL INFO Using network IB +gpub051:3225328:3225405 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub051:3225328:3225405 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub051:3225328:3225405 [0] NCCL INFO Connected all rings +gpub073:748600:748671 [3] NCCL INFO Connected all trees +gpub073:748600:748671 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:748600:748671 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:748600:748671 [3] NCCL INFO comm 0x4f8ebf60 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Connected all trees +gpub051:3225328:3225405 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3225328:3225405 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3225328:3225405 [0] NCCL INFO comm 0x4f680190 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub050:2539554:2539554 [2] NCCL INFO cudaDriverVersion 12010 +gpub050:2539554:2539554 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2539554:2539554 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2539554:2539627 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2539554:2539627 [2] NCCL INFO Using network IB +gpub050:2539554:2539627 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub050:2539554:2539627 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub050:2539554:2539627 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub050:2539554:2539627 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub050:2539554:2539627 [2] NCCL INFO Connected all rings +gpub050:2539554:2539627 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub050:2539554:2539627 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub050:2539554:2539627 [2] NCCL INFO Connected all trees +gpub050:2539554:2539627 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2539554:2539627 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2539554:2539627 [2] NCCL INFO comm 0xa469b710 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub027:3834398:3834398 [2] NCCL INFO cudaDriverVersion 12010 +gpub027:3834398:3834398 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:3834398:3834398 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:3834398:3834473 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:3834398:3834473 [2] NCCL INFO Using network IB +gpub027:3834398:3834473 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub027:3834398:3834473 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub027:3834398:3834473 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub027:3834398:3834473 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub027:3834398:3834473 [2] NCCL INFO Connected all rings +gpub027:3834398:3834473 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub027:3834398:3834473 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub027:3834398:3834473 [2] NCCL INFO Connected all trees +gpub027:3834398:3834473 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:3834398:3834473 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:3834398:3834473 [2] NCCL INFO comm 0x505e2640 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub073:748598:748598 [1] NCCL INFO cudaDriverVersion 12010 +gpub073:748598:748598 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:748598:748598 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:748598:748673 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:748598:748673 [1] NCCL INFO Using network IB +gpub073:748598:748673 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub073:748598:748673 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub073:748598:748673 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub073:748598:748673 [1] NCCL INFO Connected all rings +gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub073:748598:748673 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub073:748598:748673 [1] NCCL INFO Connected all trees +gpub073:748598:748673 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:748598:748673 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:748598:748673 [1] NCCL INFO comm 0xb7883d00 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub073:748597:748597 [0] NCCL INFO cudaDriverVersion 12010 +gpub073:748597:748597 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:748597:748597 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:748597:748674 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:748597:748674 [0] NCCL INFO Using network IB +gpub073:748597:748674 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub073:748597:748674 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub073:748597:748674 [0] NCCL INFO Connected all rings +gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Connected all trees +gpub073:748597:748674 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:748597:748674 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:748597:748674 [0] NCCL INFO comm 0xa03dfc0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub050:2539552:2539552 [0] NCCL INFO cudaDriverVersion 12010 +gpub050:2539552:2539552 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2539552:2539552 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2539552:2539628 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2539552:2539628 [0] NCCL INFO Using network IB +gpub050:2539552:2539628 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub050:2539552:2539628 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub050:2539552:2539628 [0] NCCL INFO Connected all rings +gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Connected all trees +gpub050:2539552:2539628 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2539552:2539628 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2539552:2539628 [0] NCCL INFO comm 0xaafdc050 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub052:2277063:2277063 [1] NCCL INFO cudaDriverVersion 12010 +gpub052:2277063:2277063 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2277063:2277063 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2277063:2277140 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2277063:2277140 [1] NCCL INFO Using network IB +gpub052:2277063:2277140 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub052:2277063:2277140 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub052:2277063:2277140 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub052:2277063:2277140 [1] NCCL INFO Connected all rings +gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub052:2277063:2277140 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub052:2277063:2277140 [1] NCCL INFO Connected all trees +gpub052:2277063:2277140 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2277063:2277140 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2277063:2277140 [1] NCCL INFO comm 0xa865590 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:2789794:2789794 [1] NCCL INFO cudaDriverVersion 12010 +gpub008:2789794:2789794 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2789794:2789794 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2789794:2789873 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2789794:2789873 [1] NCCL INFO Using network IB +gpub008:2789794:2789873 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub008:2789794:2789873 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub008:2789794:2789873 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub008:2789794:2789873 [1] NCCL INFO Connected all rings +gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub008:2789794:2789873 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub008:2789794:2789873 [1] NCCL INFO Connected all trees +gpub008:2789794:2789873 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2789794:2789873 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2789794:2789873 [1] NCCL INFO comm 0x8abbf8b0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub002:2108201:2108201 [2] NCCL INFO cudaDriverVersion 12010 +gpub002:2108201:2108201 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2108201:2108201 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2108201:2108276 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2108201:2108276 [2] NCCL INFO Using network IB +gpub002:2108201:2108276 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub002:2108201:2108276 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub002:2108201:2108276 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub002:2108201:2108276 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub002:2108201:2108276 [2] NCCL INFO Connected all rings +gpub002:2108201:2108276 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub002:2108201:2108276 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub002:2108201:2108276 [2] NCCL INFO Connected all trees +gpub002:2108201:2108276 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2108201:2108276 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2108201:2108276 [2] NCCL INFO comm 0x8ca2cb90 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub051:3225330:3225330 [2] NCCL INFO cudaDriverVersion 12010 +gpub051:3225330:3225330 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3225330:3225330 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3225330:3225408 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3225330:3225408 [2] NCCL INFO Using network IB +gpub051:3225330:3225408 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub051:3225330:3225408 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub051:3225330:3225408 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub051:3225330:3225408 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub051:3225330:3225408 [2] NCCL INFO Connected all rings +gpub051:3225330:3225408 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub051:3225330:3225408 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub051:3225330:3225408 [2] NCCL INFO Connected all trees +gpub051:3225330:3225408 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3225330:3225408 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3225330:3225408 [2] NCCL INFO comm 0x4f59a920 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub051:3225331:3225331 [3] NCCL INFO cudaDriverVersion 12010 +gpub051:3225331:3225331 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3225331:3225331 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3225331:3225406 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3225331:3225406 [3] NCCL INFO Using network IB +gpub051:3225331:3225406 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub051:3225331:3225406 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub051:3225331:3225406 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub051:3225331:3225406 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub051:3225331:3225406 [3] NCCL INFO Connected all rings +gpub051:3225331:3225406 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub051:3225331:3225406 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub051:3225331:3225406 [3] NCCL INFO Connected all trees +gpub051:3225331:3225406 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3225331:3225406 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3225331:3225406 [3] NCCL INFO comm 0xb371b610 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub052:2277065:2277065 [3] NCCL INFO cudaDriverVersion 12010 +gpub052:2277065:2277065 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2277065:2277065 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2277065:2277139 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2277065:2277139 [3] NCCL INFO Using network IB +gpub052:2277065:2277139 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub052:2277065:2277139 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub052:2277065:2277139 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub052:2277065:2277139 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub052:2277065:2277139 [3] NCCL INFO Connected all rings +gpub052:2277065:2277139 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub052:2277065:2277139 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub052:2277065:2277139 [3] NCCL INFO Connected all trees +gpub052:2277065:2277139 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2277065:2277139 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2277065:2277139 [3] NCCL INFO comm 0x8f38890 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub002:0/64] 2023-07-12 13:27:01,090 (trainer:732) INFO: 40epoch:train:1-100batch: iter_time=1.208, forward_time=0.235, loss_ctc=61.116, loss_att=44.050, acc=0.697, loss=49.170, backward_time=1.036, grad_norm=103.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.735e-05, train_time=8.676 +[gpub002:0/64] 2023-07-12 13:29:17,100 (trainer:732) INFO: 40epoch:train:101-200batch: iter_time=1.264e-04, forward_time=0.142, loss_ctc=72.135, loss_att=57.235, acc=0.700, loss=61.705, backward_time=1.027, grad_norm=107.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.734e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 13:31:32,346 (trainer:732) INFO: 40epoch:train:201-300batch: iter_time=1.291e-04, forward_time=0.142, loss_ctc=81.043, loss_att=57.325, acc=0.717, loss=64.440, backward_time=1.025, grad_norm=163.403, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.734e-05, train_time=2.705 +[gpub002:0/64] 2023-07-12 13:33:47,706 (trainer:732) INFO: 40epoch:train:301-400batch: iter_time=1.172e-04, forward_time=0.143, loss_ctc=71.829, loss_att=55.277, acc=0.697, loss=60.243, backward_time=1.026, grad_norm=110.550, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.733e-05, train_time=2.707 +[gpub002:0/64] 2023-07-12 13:36:02,885 (trainer:732) INFO: 40epoch:train:401-500batch: iter_time=1.285e-04, forward_time=0.143, loss_ctc=72.347, loss_att=53.215, acc=0.711, loss=58.954, backward_time=1.025, grad_norm=113.868, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.732e-05, train_time=2.703 +[gpub002:0/64] 2023-07-12 13:38:27,982 (trainer:732) INFO: 40epoch:train:501-600batch: iter_time=1.282e-04, forward_time=0.141, loss_ctc=68.400, loss_att=46.292, acc=0.689, loss=52.924, backward_time=1.032, grad_norm=117.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.731e-05, train_time=2.902 +[gpub002:0/64] 2023-07-12 13:40:47,434 (trainer:732) INFO: 40epoch:train:601-700batch: iter_time=1.231e-04, forward_time=0.142, loss_ctc=68.083, loss_att=50.252, acc=0.715, loss=55.601, backward_time=1.028, grad_norm=108.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.731e-05, train_time=2.789 +[gpub002:0/64] 2023-07-12 13:43:10,107 (trainer:732) INFO: 40epoch:train:701-800batch: iter_time=1.157e-04, forward_time=0.142, loss_ctc=68.676, loss_att=52.334, acc=0.698, loss=57.237, backward_time=1.024, grad_norm=108.949, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.730e-05, train_time=2.852 +[gpub002:0/64] 2023-07-12 13:44:08,310 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-12 13:44:25,786 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 13:44:29,170 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 13:44:29,170 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-12 13:44:29,176 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 13:49:29,871 (trainer:732) INFO: 40epoch:train:801-900batch: iter_time=2.128, forward_time=0.187, loss_ctc=62.142, loss_att=45.987, acc=0.699, loss=50.833, backward_time=1.043, grad_norm=127.878, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.729e-05, train_time=7.596 +[gpub002:0/64] 2023-07-12 13:51:45,925 (trainer:732) INFO: 40epoch:train:901-1000batch: iter_time=1.253e-04, forward_time=0.144, loss_ctc=73.026, loss_att=58.214, acc=0.710, loss=62.657, backward_time=1.025, grad_norm=124.501, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.728e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 13:54:01,587 (trainer:732) INFO: 40epoch:train:1001-1100batch: iter_time=1.277e-04, forward_time=0.143, loss_ctc=79.141, loss_att=57.052, acc=0.725, loss=63.679, backward_time=1.025, grad_norm=151.133, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.727e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 13:56:17,421 (trainer:732) INFO: 40epoch:train:1101-1200batch: iter_time=1.301e-04, forward_time=0.143, loss_ctc=71.202, loss_att=55.122, acc=0.705, loss=59.946, backward_time=1.026, grad_norm=97.471, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.727e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 13:58:33,114 (trainer:732) INFO: 40epoch:train:1201-1300batch: iter_time=1.240e-04, forward_time=0.143, loss_ctc=71.459, loss_att=52.133, acc=0.724, loss=57.931, backward_time=1.025, grad_norm=140.565, clip=100.000, loss_scale=5.192e+32, optim_step_time=0.179, optim0_lr0=5.726e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 14:00:48,389 (trainer:732) INFO: 40epoch:train:1301-1400batch: iter_time=1.241e-04, forward_time=0.143, loss_ctc=65.305, loss_att=45.866, acc=0.695, loss=51.698, backward_time=1.021, grad_norm=108.278, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.725e-05, train_time=2.705 +[gpub002:0/64] 2023-07-12 14:03:05,877 (trainer:732) INFO: 40epoch:train:1401-1500batch: iter_time=1.239e-04, forward_time=0.142, loss_ctc=68.643, loss_att=50.860, acc=0.714, loss=56.195, backward_time=1.025, grad_norm=114.462, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.724e-05, train_time=2.750 +[gpub002:0/64] 2023-07-12 14:05:24,000 (trainer:732) INFO: 40epoch:train:1501-1600batch: iter_time=1.178e-04, forward_time=0.143, loss_ctc=67.303, loss_att=51.300, acc=0.712, loss=56.101, backward_time=1.026, grad_norm=99.620, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.724e-05, train_time=2.762 +[gpub002:0/64] 2023-07-12 14:06:37,008 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 14:06:56,326 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-12 14:07:14,096 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 14:07:17,513 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 14:07:17,513 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-12 14:07:17,519 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 14:12:18,848 (trainer:732) INFO: 40epoch:train:1601-1700batch: iter_time=1.255, forward_time=0.143, loss_ctc=66.208, loss_att=50.001, acc=0.701, loss=54.863, backward_time=1.030, grad_norm=128.956, clip=100.000, loss_scale=4.967e+32, optim_step_time=0.179, optim0_lr0=5.723e-05, train_time=8.297 +[gpub002:0/64] 2023-07-12 14:14:35,280 (trainer:732) INFO: 40epoch:train:1701-1800batch: iter_time=1.084e-04, forward_time=0.143, loss_ctc=67.632, loss_att=51.671, acc=0.709, loss=56.460, backward_time=1.028, grad_norm=131.230, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.722e-05, train_time=2.728 +[gpub002:0/64] 2023-07-12 14:16:51,826 (trainer:732) INFO: 40epoch:train:1801-1900batch: iter_time=1.260e-04, forward_time=0.145, loss_ctc=77.856, loss_att=57.061, acc=0.718, loss=63.299, backward_time=1.031, grad_norm=127.495, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.722e-05, train_time=2.731 +[gpub002:0/64] 2023-07-12 14:19:07,735 (trainer:732) INFO: 40epoch:train:1901-2000batch: iter_time=1.083e-04, forward_time=0.143, loss_ctc=74.187, loss_att=54.012, acc=0.723, loss=60.065, backward_time=1.029, grad_norm=134.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.721e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 14:21:23,408 (trainer:732) INFO: 40epoch:train:2001-2100batch: iter_time=1.295e-04, forward_time=0.143, loss_ctc=71.655, loss_att=56.017, acc=0.709, loss=60.708, backward_time=1.025, grad_norm=113.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.720e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 14:23:38,786 (trainer:732) INFO: 40epoch:train:2101-2200batch: iter_time=1.211e-04, forward_time=0.143, loss_ctc=68.732, loss_att=48.850, acc=0.703, loss=54.815, backward_time=1.022, grad_norm=114.122, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.719e-05, train_time=2.707 +[gpub002:0/64] 2023-07-12 14:25:54,333 (trainer:732) INFO: 40epoch:train:2201-2300batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=63.319, loss_att=45.815, acc=0.718, loss=51.066, backward_time=1.024, grad_norm=112.742, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.719e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 14:28:11,996 (trainer:732) INFO: 40epoch:train:2301-2400batch: iter_time=1.254e-04, forward_time=0.143, loss_ctc=66.195, loss_att=50.742, acc=0.715, loss=55.378, backward_time=1.023, grad_norm=102.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.718e-05, train_time=2.753 +[gpub002:0/64] 2023-07-12 14:30:30,321 (trainer:732) INFO: 40epoch:train:2401-2500batch: iter_time=1.265e-04, forward_time=0.143, loss_ctc=71.093, loss_att=53.758, acc=0.706, loss=58.959, backward_time=1.027, grad_norm=115.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.717e-05, train_time=2.766 +[gpub002:0/64] 2023-07-12 14:30:32,911 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-12 14:30:50,993 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 14:30:54,403 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 14:30:54,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-12 14:30:54,409 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 14:37:48,647 (trainer:732) INFO: 40epoch:train:2501-2600batch: iter_time=1.257, forward_time=0.143, loss_ctc=61.405, loss_att=46.625, acc=0.694, loss=51.059, backward_time=1.033, grad_norm=109.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.716e-05, train_time=8.766 +[gpub002:0/64] 2023-07-12 14:40:05,295 (trainer:732) INFO: 40epoch:train:2601-2700batch: iter_time=1.359e-04, forward_time=0.144, loss_ctc=69.964, loss_att=53.049, acc=0.717, loss=58.124, backward_time=1.027, grad_norm=135.636, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.716e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 14:42:21,035 (trainer:732) INFO: 40epoch:train:2701-2800batch: iter_time=1.229e-04, forward_time=0.144, loss_ctc=77.512, loss_att=56.118, acc=0.709, loss=62.537, backward_time=1.025, grad_norm=137.980, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.715e-05, train_time=2.715 +[gpub002:0/64] 2023-07-12 14:44:36,479 (trainer:732) INFO: 40epoch:train:2801-2900batch: iter_time=1.048e-04, forward_time=0.142, loss_ctc=71.235, loss_att=56.169, acc=0.706, loss=60.689, backward_time=1.023, grad_norm=120.061, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.714e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 14:46:51,759 (trainer:732) INFO: 40epoch:train:2901-3000batch: iter_time=1.043e-04, forward_time=0.143, loss_ctc=68.448, loss_att=51.921, acc=0.704, loss=56.879, backward_time=1.022, grad_norm=104.401, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.713e-05, train_time=2.705 +[gpub002:0/64] 2023-07-12 14:49:07,671 (trainer:732) INFO: 40epoch:train:3001-3100batch: iter_time=1.139e-04, forward_time=0.142, loss_ctc=66.825, loss_att=44.975, acc=0.706, loss=51.530, backward_time=1.021, grad_norm=103.993, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.713e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 14:51:28,934 (trainer:732) INFO: 40epoch:train:3101-3200batch: iter_time=1.132e-04, forward_time=0.142, loss_ctc=65.918, loss_att=48.901, acc=0.716, loss=54.006, backward_time=1.027, grad_norm=102.075, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.712e-05, train_time=2.825 +[gpub002:0/64] 2023-07-12 14:53:49,088 (trainer:732) INFO: 40epoch:train:3201-3300batch: iter_time=1.065e-04, forward_time=0.142, loss_ctc=69.493, loss_att=54.998, acc=0.698, loss=59.346, backward_time=1.045, grad_norm=117.513, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.711e-05, train_time=2.803 +[gpub002:0/64] 2023-07-12 14:54:39,670 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-12 14:54:57,543 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 14:55:01,100 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 14:55:01,100 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-12 14:55:01,106 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 15:00:02,934 (trainer:732) INFO: 40epoch:train:3301-3400batch: iter_time=1.259, forward_time=0.143, loss_ctc=63.620, loss_att=45.338, acc=0.714, loss=50.822, backward_time=1.049, grad_norm=109.621, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.710e-05, train_time=7.477 +[gpub002:0/64] 2023-07-12 15:02:20,429 (trainer:732) INFO: 40epoch:train:3401-3500batch: iter_time=1.191e-04, forward_time=0.143, loss_ctc=66.528, loss_att=49.949, acc=0.714, loss=54.922, backward_time=1.026, grad_norm=125.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.710e-05, train_time=2.750 +[gpub002:0/64] 2023-07-12 15:04:36,303 (trainer:732) INFO: 40epoch:train:3501-3600batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=76.092, loss_att=55.412, acc=0.724, loss=61.616, backward_time=1.026, grad_norm=122.114, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.709e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 15:06:52,169 (trainer:732) INFO: 40epoch:train:3601-3700batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=72.680, loss_att=53.035, acc=0.724, loss=58.929, backward_time=1.025, grad_norm=97.254, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.708e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 15:09:07,796 (trainer:732) INFO: 40epoch:train:3701-3800batch: iter_time=1.156e-04, forward_time=0.143, loss_ctc=69.121, loss_att=53.328, acc=0.720, loss=58.066, backward_time=1.024, grad_norm=139.481, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.707e-05, train_time=2.712 +[gpub002:0/64] 2023-07-12 15:11:23,110 (trainer:732) INFO: 40epoch:train:3801-3900batch: iter_time=1.205e-04, forward_time=0.143, loss_ctc=69.056, loss_att=47.903, acc=0.705, loss=54.249, backward_time=1.021, grad_norm=121.204, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.707e-05, train_time=2.706 +[gpub002:0/64] 2023-07-12 15:13:40,798 (trainer:732) INFO: 40epoch:train:3901-4000batch: iter_time=1.128e-04, forward_time=0.143, loss_ctc=65.887, loss_att=48.895, acc=0.717, loss=53.992, backward_time=1.027, grad_norm=104.050, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.706e-05, train_time=2.754 +[gpub002:0/64] 2023-07-12 15:16:02,209 (trainer:732) INFO: 40epoch:train:4001-4100batch: iter_time=1.186e-04, forward_time=0.143, loss_ctc=66.041, loss_att=51.439, acc=0.707, loss=55.819, backward_time=1.028, grad_norm=112.561, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.705e-05, train_time=2.828 +[gpub002:0/64] 2023-07-12 15:17:39,601 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-12 15:17:57,438 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 15:18:00,971 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 15:18:00,971 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-12 15:18:00,978 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 15:21:47,365 (trainer:732) INFO: 40epoch:train:4101-4200batch: iter_time=1.250, forward_time=0.144, loss_ctc=68.950, loss_att=50.746, acc=0.718, loss=56.207, backward_time=1.041, grad_norm=106.172, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.704e-05, train_time=6.903 +[gpub002:0/64] 2023-07-12 15:24:03,354 (trainer:732) INFO: 40epoch:train:4201-4300batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=62.707, loss_att=48.487, acc=0.702, loss=52.753, backward_time=1.026, grad_norm=100.763, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.704e-05, train_time=2.720 +[gpub002:0/64] 2023-07-12 15:26:19,013 (trainer:732) INFO: 40epoch:train:4301-4400batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=69.760, loss_att=52.370, acc=0.725, loss=57.587, backward_time=1.023, grad_norm=106.670, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.703e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 15:28:34,645 (trainer:732) INFO: 40epoch:train:4401-4500batch: iter_time=1.100e-04, forward_time=0.143, loss_ctc=78.815, loss_att=57.161, acc=0.718, loss=63.657, backward_time=1.024, grad_norm=126.050, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.702e-05, train_time=2.712 +[gpub002:0/64] 2023-07-12 15:30:50,128 (trainer:732) INFO: 40epoch:train:4501-4600batch: iter_time=1.185e-04, forward_time=0.143, loss_ctc=67.843, loss_att=54.104, acc=0.716, loss=58.226, backward_time=1.023, grad_norm=113.696, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.701e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 15:33:05,671 (trainer:732) INFO: 40epoch:train:4601-4700batch: iter_time=1.154e-04, forward_time=0.143, loss_ctc=70.374, loss_att=50.230, acc=0.723, loss=56.273, backward_time=1.023, grad_norm=118.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.701e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 15:35:20,996 (trainer:732) INFO: 40epoch:train:4701-4800batch: iter_time=1.161e-04, forward_time=0.143, loss_ctc=64.594, loss_att=44.965, acc=0.707, loss=50.854, backward_time=1.022, grad_norm=106.973, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.700e-05, train_time=2.706 +[gpub002:0/64] 2023-07-12 15:37:36,540 (trainer:732) INFO: 40epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.143, loss_ctc=67.322, loss_att=50.435, acc=0.717, loss=55.501, backward_time=1.024, grad_norm=103.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.699e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 15:39:52,011 (trainer:732) INFO: 40epoch:train:4901-5000batch: iter_time=1.164e-04, forward_time=0.143, loss_ctc=70.025, loss_att=54.199, acc=0.712, loss=58.947, backward_time=1.023, grad_norm=113.323, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.698e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 15:39:54,676 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-12 15:40:13,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 15:40:16,430 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 15:40:16,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-12 15:40:16,437 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 15:45:20,743 (trainer:732) INFO: 40epoch:train:5001-5100batch: iter_time=1.263, forward_time=0.180, loss_ctc=60.084, loss_att=45.552, acc=0.702, loss=49.912, backward_time=1.033, grad_norm=102.864, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.698e-05, train_time=6.574 +[gpub002:0/64] 2023-07-12 15:47:37,331 (trainer:732) INFO: 40epoch:train:5101-5200batch: iter_time=1.241e-04, forward_time=0.144, loss_ctc=70.511, loss_att=52.939, acc=0.720, loss=58.210, backward_time=1.024, grad_norm=103.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.697e-05, train_time=2.732 +[gpub002:0/64] 2023-07-12 15:49:58,905 (trainer:732) INFO: 40epoch:train:5201-5300batch: iter_time=1.112e-04, forward_time=0.142, loss_ctc=77.354, loss_att=56.014, acc=0.710, loss=62.416, backward_time=1.023, grad_norm=133.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.696e-05, train_time=2.831 +[gpub002:0/64] 2023-07-12 15:52:14,432 (trainer:732) INFO: 40epoch:train:5301-5400batch: iter_time=1.243e-04, forward_time=0.143, loss_ctc=68.506, loss_att=54.652, acc=0.711, loss=58.808, backward_time=1.025, grad_norm=114.787, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.695e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 15:54:29,792 (trainer:732) INFO: 40epoch:train:5401-5500batch: iter_time=1.084e-04, forward_time=0.143, loss_ctc=68.380, loss_att=50.824, acc=0.707, loss=56.091, backward_time=1.023, grad_norm=186.761, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.695e-05, train_time=2.707 +[gpub002:0/64] 2023-07-12 15:56:44,731 (trainer:732) INFO: 40epoch:train:5501-5600batch: iter_time=1.176e-04, forward_time=0.142, loss_ctc=65.993, loss_att=45.437, acc=0.706, loss=51.604, backward_time=1.019, grad_norm=104.703, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.694e-05, train_time=2.699 +[gpub002:0/64] 2023-07-12 15:58:59,893 (trainer:732) INFO: 40epoch:train:5601-5700batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=65.646, loss_att=48.820, acc=0.718, loss=53.868, backward_time=1.022, grad_norm=121.882, clip=100.000, loss_scale=4.738e+32, optim_step_time=0.180, optim0_lr0=5.693e-05, train_time=2.703 +[gpub002:0/64] 2023-07-12 16:01:15,393 (trainer:732) INFO: 40epoch:train:5701-5800batch: iter_time=1.205e-04, forward_time=0.143, loss_ctc=69.173, loss_att=55.052, acc=0.701, loss=59.288, backward_time=1.025, grad_norm=113.446, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.693e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 16:02:13,718 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-12 16:02:31,777 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 16:02:35,214 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 16:02:35,215 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-12 16:02:35,223 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 16:06:51,419 (trainer:732) INFO: 40epoch:train:5801-5900batch: iter_time=1.911, forward_time=0.144, loss_ctc=61.992, loss_att=46.321, acc=0.712, loss=51.022, backward_time=1.033, grad_norm=119.027, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.692e-05, train_time=6.720 +[gpub002:0/64] 2023-07-12 16:09:07,218 (trainer:732) INFO: 40epoch:train:5901-6000batch: iter_time=1.335e-04, forward_time=0.143, loss_ctc=66.798, loss_att=49.972, acc=0.708, loss=55.019, backward_time=1.023, grad_norm=108.107, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.691e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 16:11:22,903 (trainer:732) INFO: 40epoch:train:6001-6100batch: iter_time=1.237e-04, forward_time=0.143, loss_ctc=76.895, loss_att=56.337, acc=0.716, loss=62.504, backward_time=1.024, grad_norm=192.818, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.690e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 16:13:38,302 (trainer:732) INFO: 40epoch:train:6101-6200batch: iter_time=1.439e-04, forward_time=0.144, loss_ctc=73.992, loss_att=52.793, acc=0.718, loss=59.152, backward_time=1.022, grad_norm=97.949, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.690e-05, train_time=2.708 +[gpub002:0/64] 2023-07-12 16:15:53,887 (trainer:732) INFO: 40epoch:train:6201-6300batch: iter_time=1.375e-04, forward_time=0.144, loss_ctc=69.085, loss_att=53.781, acc=0.709, loss=58.372, backward_time=1.024, grad_norm=107.327, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.689e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 16:18:09,295 (trainer:732) INFO: 40epoch:train:6301-6400batch: iter_time=1.276e-04, forward_time=0.145, loss_ctc=67.794, loss_att=47.222, acc=0.705, loss=53.394, backward_time=1.023, grad_norm=105.215, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.688e-05, train_time=2.708 +[gpub002:0/64] 2023-07-12 16:20:24,498 (trainer:732) INFO: 40epoch:train:6401-6500batch: iter_time=1.279e-04, forward_time=0.145, loss_ctc=65.690, loss_att=47.722, acc=0.718, loss=53.112, backward_time=1.023, grad_norm=125.378, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.687e-05, train_time=2.704 +[gpub002:0/64] 2023-07-12 16:22:12,380 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 16:22:39,598 (trainer:732) INFO: 40epoch:train:6501-6600batch: iter_time=1.096e-04, forward_time=0.144, loss_ctc=64.236, loss_att=49.951, acc=0.705, loss=54.237, backward_time=1.023, grad_norm=93.202, clip=100.000, loss_scale=5.828e+32, optim_step_time=0.180, optim0_lr0=5.687e-05, train_time=2.702 +[gpub002:0/64] 2023-07-12 16:24:14,970 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-12 16:24:32,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 16:24:36,515 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 16:24:36,515 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-12 16:24:36,521 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 16:28:25,470 (trainer:732) INFO: 40epoch:train:6601-6700batch: iter_time=1.255, forward_time=0.145, loss_ctc=68.337, loss_att=49.507, acc=0.724, loss=55.156, backward_time=1.038, grad_norm=116.207, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.686e-05, train_time=6.917 +[gpub002:0/64] 2023-07-12 16:30:42,084 (trainer:732) INFO: 40epoch:train:6701-6800batch: iter_time=1.329e-04, forward_time=0.145, loss_ctc=62.314, loss_att=47.201, acc=0.706, loss=51.735, backward_time=1.025, grad_norm=98.754, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.685e-05, train_time=2.732 +[gpub002:0/64] 2023-07-12 16:32:57,908 (trainer:732) INFO: 40epoch:train:6801-6900batch: iter_time=1.346e-04, forward_time=0.145, loss_ctc=68.481, loss_att=52.287, acc=0.727, loss=57.145, backward_time=1.026, grad_norm=109.277, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.684e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 16:35:13,486 (trainer:732) INFO: 40epoch:train:6901-7000batch: iter_time=1.281e-04, forward_time=0.144, loss_ctc=79.843, loss_att=57.285, acc=0.717, loss=64.053, backward_time=1.026, grad_norm=130.573, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.684e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 16:37:29,332 (trainer:732) INFO: 40epoch:train:7001-7100batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=67.417, loss_att=53.169, acc=0.721, loss=57.444, backward_time=1.027, grad_norm=147.307, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.683e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 16:39:44,891 (trainer:732) INFO: 40epoch:train:7101-7200batch: iter_time=1.272e-04, forward_time=0.145, loss_ctc=70.180, loss_att=50.528, acc=0.727, loss=56.424, backward_time=1.024, grad_norm=107.531, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.682e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 16:42:05,273 (trainer:732) INFO: 40epoch:train:7201-7300batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=63.893, loss_att=44.742, acc=0.708, loss=50.488, backward_time=1.023, grad_norm=103.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.681e-05, train_time=2.807 +[gpub002:0/64] 2023-07-12 16:44:20,746 (trainer:732) INFO: 40epoch:train:7301-7400batch: iter_time=1.136e-04, forward_time=0.143, loss_ctc=67.728, loss_att=50.827, acc=0.719, loss=55.897, backward_time=1.024, grad_norm=118.243, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.681e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 16:46:36,313 (trainer:732) INFO: 40epoch:train:7401-7500batch: iter_time=1.123e-04, forward_time=0.143, loss_ctc=69.549, loss_att=54.017, acc=0.715, loss=58.676, backward_time=1.024, grad_norm=105.138, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.680e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 16:46:39,103 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-12 16:46:57,326 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 16:47:00,716 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 16:47:00,716 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-12 16:47:00,722 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 16:52:43,273 (trainer:732) INFO: 40epoch:train:7501-7600batch: iter_time=1.295, forward_time=0.144, loss_ctc=58.813, loss_att=42.145, acc=0.710, loss=47.145, backward_time=1.037, grad_norm=124.111, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.679e-05, train_time=7.339 +[gpub002:0/64] 2023-07-12 16:54:59,882 (trainer:732) INFO: 40epoch:train:7601-7700batch: iter_time=1.243e-04, forward_time=0.144, loss_ctc=68.732, loss_att=53.937, acc=0.713, loss=58.375, backward_time=1.029, grad_norm=130.599, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.679e-05, train_time=2.732 +[gpub002:0/64] 2023-07-12 16:57:15,353 (trainer:732) INFO: 40epoch:train:7701-7800batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=77.851, loss_att=55.826, acc=0.727, loss=62.433, backward_time=1.025, grad_norm=121.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.678e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 16:59:30,902 (trainer:732) INFO: 40epoch:train:7801-7900batch: iter_time=1.444e-04, forward_time=0.144, loss_ctc=70.346, loss_att=53.701, acc=0.708, loss=58.695, backward_time=1.026, grad_norm=112.362, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.677e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 17:01:46,962 (trainer:732) INFO: 40epoch:train:7901-8000batch: iter_time=1.644e-04, forward_time=0.145, loss_ctc=69.909, loss_att=51.923, acc=0.720, loss=57.319, backward_time=1.024, grad_norm=115.655, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.676e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 17:04:02,181 (trainer:732) INFO: 40epoch:train:8001-8100batch: iter_time=1.097e-04, forward_time=0.143, loss_ctc=64.797, loss_att=45.585, acc=0.695, loss=51.348, backward_time=1.021, grad_norm=96.860, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.676e-05, train_time=2.704 +[gpub002:0/64] 2023-07-12 17:06:18,006 (trainer:732) INFO: 40epoch:train:8101-8200batch: iter_time=1.347e-04, forward_time=0.144, loss_ctc=65.946, loss_att=48.537, acc=0.721, loss=53.759, backward_time=1.025, grad_norm=103.143, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.675e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 17:08:43,714 (trainer:732) INFO: 40epoch:train:8201-8300batch: iter_time=1.585e-04, forward_time=0.144, loss_ctc=66.556, loss_att=50.587, acc=0.709, loss=55.378, backward_time=1.033, grad_norm=104.722, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.674e-05, train_time=2.914 +[gpub002:0/64] 2023-07-12 17:09:31,672 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-12 17:09:49,815 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 17:09:53,285 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 17:09:53,285 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-12 17:09:53,291 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 17:16:05,942 (trainer:732) INFO: 40epoch:train:8301-8400batch: iter_time=1.764, forward_time=0.145, loss_ctc=61.574, loss_att=47.462, acc=0.713, loss=51.695, backward_time=1.040, grad_norm=100.876, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.673e-05, train_time=8.844 +[gpub002:0/64] 2023-07-12 17:18:22,844 (trainer:732) INFO: 40epoch:train:8401-8500batch: iter_time=1.222e-04, forward_time=0.144, loss_ctc=66.028, loss_att=49.519, acc=0.712, loss=54.472, backward_time=1.025, grad_norm=100.831, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.673e-05, train_time=2.738 +[gpub002:0/64] 2023-07-12 17:20:39,127 (trainer:732) INFO: 40epoch:train:8501-8600batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=75.776, loss_att=55.115, acc=0.722, loss=61.313, backward_time=1.028, grad_norm=133.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.672e-05, train_time=2.725 +[gpub002:0/64] 2023-07-12 17:22:54,520 (trainer:732) INFO: 40epoch:train:8601-8700batch: iter_time=1.148e-04, forward_time=0.143, loss_ctc=71.264, loss_att=51.115, acc=0.722, loss=57.159, backward_time=1.024, grad_norm=108.773, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.671e-05, train_time=2.708 +[gpub002:0/64] 2023-07-12 17:25:10,311 (trainer:732) INFO: 40epoch:train:8701-8800batch: iter_time=1.039e-04, forward_time=0.144, loss_ctc=70.093, loss_att=53.908, acc=0.711, loss=58.763, backward_time=1.027, grad_norm=115.711, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.671e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 17:27:25,641 (trainer:732) INFO: 40epoch:train:8801-8900batch: iter_time=1.147e-04, forward_time=0.143, loss_ctc=65.537, loss_att=45.778, acc=0.707, loss=51.706, backward_time=1.025, grad_norm=109.775, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.670e-05, train_time=2.706 +[gpub002:0/64] 2023-07-12 17:29:40,885 (trainer:732) INFO: 40epoch:train:8901-9000batch: iter_time=1.186e-04, forward_time=0.143, loss_ctc=64.933, loss_att=47.129, acc=0.721, loss=52.470, backward_time=1.024, grad_norm=108.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.669e-05, train_time=2.705 +[gpub002:0/64] 2023-07-12 17:31:56,246 (trainer:732) INFO: 40epoch:train:9001-9100batch: iter_time=1.121e-04, forward_time=0.143, loss_ctc=64.752, loss_att=50.086, acc=0.708, loss=54.486, backward_time=1.024, grad_norm=114.341, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.668e-05, train_time=2.707 +[gpub002:0/64] 2023-07-12 17:33:29,921 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-12 17:33:48,221 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 17:33:51,742 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 17:33:51,743 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-12 17:33:51,749 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 17:38:46,300 (trainer:732) INFO: 40epoch:train:9101-9200batch: iter_time=1.947, forward_time=0.180, loss_ctc=68.492, loss_att=49.647, acc=0.722, loss=55.300, backward_time=1.038, grad_norm=117.017, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.668e-05, train_time=8.201 +[gpub002:0/64] 2023-07-12 17:41:02,935 (trainer:732) INFO: 40epoch:train:9201-9300batch: iter_time=1.099e-04, forward_time=0.145, loss_ctc=61.780, loss_att=48.408, acc=0.705, loss=52.420, backward_time=1.025, grad_norm=130.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.667e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 17:43:20,427 (trainer:732) INFO: 40epoch:train:9301-9400batch: iter_time=1.202e-04, forward_time=0.144, loss_ctc=68.219, loss_att=52.508, acc=0.732, loss=57.222, backward_time=1.027, grad_norm=114.930, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.666e-05, train_time=2.750 +[gpub002:0/64] 2023-07-12 17:45:36,344 (trainer:732) INFO: 40epoch:train:9401-9500batch: iter_time=9.479e-05, forward_time=0.142, loss_ctc=78.737, loss_att=56.410, acc=0.723, loss=63.108, backward_time=1.025, grad_norm=120.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.665e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 17:47:51,855 (trainer:732) INFO: 40epoch:train:9501-9600batch: iter_time=9.460e-05, forward_time=0.143, loss_ctc=66.156, loss_att=54.034, acc=0.715, loss=57.671, backward_time=1.023, grad_norm=119.804, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.665e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 17:50:07,301 (trainer:732) INFO: 40epoch:train:9601-9700batch: iter_time=9.742e-05, forward_time=0.143, loss_ctc=69.405, loss_att=50.896, acc=0.723, loss=56.449, backward_time=1.024, grad_norm=105.723, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.664e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 17:52:22,352 (trainer:732) INFO: 40epoch:train:9701-9800batch: iter_time=1.037e-04, forward_time=0.142, loss_ctc=62.756, loss_att=43.537, acc=0.712, loss=49.303, backward_time=1.022, grad_norm=114.282, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.663e-05, train_time=2.701 +[gpub002:0/64] 2023-07-12 17:54:44,234 (trainer:732) INFO: 40epoch:train:9801-9900batch: iter_time=9.713e-05, forward_time=0.143, loss_ctc=67.843, loss_att=50.651, acc=0.721, loss=55.809, backward_time=1.030, grad_norm=112.352, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.663e-05, train_time=2.837 +[gpub002:0/64] 2023-07-12 17:57:01,502 (trainer:732) INFO: 40epoch:train:9901-10000batch: iter_time=9.865e-05, forward_time=0.141, loss_ctc=69.656, loss_att=53.667, acc=0.714, loss=58.464, backward_time=1.028, grad_norm=130.803, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.662e-05, train_time=2.745 +[gpub002:0/64] 2023-07-12 18:11:36,573 (trainer:338) INFO: 40epoch results: [train] iter_time=0.178, forward_time=0.145, loss_ctc=68.748, loss_att=51.163, acc=0.712, loss=56.438, backward_time=1.027, grad_norm=116.730, clip=100.000, loss_scale=3.679e+32, optim_step_time=0.180, optim0_lr0=5.698e-05, train_time=3.327, time=4 hours, 37 minutes and 31.92 seconds, total_count=370000, gpu_max_cached_mem_GB=34.277, [valid] loss_ctc=44.137, cer_ctc=0.263, loss_att=39.500, acc=0.667, cer=0.428, wer=1.000, loss=40.891, time=7 minutes and 44.49 seconds, total_count=37950, gpu_max_cached_mem_GB=37.572, [att_plot] time=6 minutes and 32.9 seconds, total_count=0, gpu_max_cached_mem_GB=37.572 +[gpub002:0/64] 2023-07-12 18:11:52,961 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-12 18:11:53,003 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till40epoch.pth +[gpub002:0/64] 2023-07-12 18:12:44,008 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till40epoch.pth +[gpub002:0/64] 2023-07-12 18:13:08,270 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/35epoch.pth +[gpub002:0/64] 2023-07-12 18:13:08,326 (trainer:272) INFO: 41/50epoch started. Estimated time to finish: 2 days, 53 minutes and 30.81 seconds +[gpub002:0/64] 2023-07-12 18:13:09,583 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-12 18:13:27,325 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 18:13:30,751 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 18:13:30,751 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-12 18:13:30,878 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 18:21:17,615 (trainer:732) INFO: 41epoch:train:1-100batch: iter_time=3.473, forward_time=0.171, loss_ctc=70.840, loss_att=55.420, acc=0.704, loss=60.046, backward_time=1.043, grad_norm=129.418, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.661e-05, train_time=9.772 +[gpub002:0/64] 2023-07-12 18:23:33,343 (trainer:732) INFO: 41epoch:train:101-200batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.748, loss_att=50.057, acc=0.705, loss=55.364, backward_time=1.028, grad_norm=114.263, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.660e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 18:25:50,201 (trainer:732) INFO: 41epoch:train:201-300batch: iter_time=1.232e-04, forward_time=0.152, loss_ctc=66.301, loss_att=51.918, acc=0.721, loss=56.233, backward_time=1.028, grad_norm=123.052, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.660e-05, train_time=2.737 +[gpub002:0/64] 2023-07-12 18:28:19,026 (trainer:732) INFO: 41epoch:train:301-400batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=75.210, loss_att=59.758, acc=0.716, loss=64.394, backward_time=1.054, grad_norm=126.516, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.659e-05, train_time=2.976 +[gpub002:0/64] 2023-07-12 18:30:40,311 (trainer:732) INFO: 41epoch:train:401-500batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=60.935, loss_att=47.224, acc=0.714, loss=51.338, backward_time=1.039, grad_norm=134.990, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.658e-05, train_time=2.825 +[gpub002:0/64] 2023-07-12 18:32:59,302 (trainer:732) INFO: 41epoch:train:501-600batch: iter_time=1.050e-04, forward_time=0.147, loss_ctc=75.633, loss_att=61.020, acc=0.718, loss=65.404, backward_time=1.032, grad_norm=146.761, clip=100.000, loss_scale=3.894e+32, optim_step_time=0.182, optim0_lr0=5.657e-05, train_time=2.780 +[gpub002:0/64] 2023-07-12 18:35:20,968 (trainer:732) INFO: 41epoch:train:601-700batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=70.522, loss_att=53.175, acc=0.708, loss=58.379, backward_time=1.040, grad_norm=114.115, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.657e-05, train_time=2.833 +[gpub002:0/64] 2023-07-12 18:37:47,401 (trainer:732) INFO: 41epoch:train:701-800batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=71.912, loss_att=54.658, acc=0.713, loss=59.834, backward_time=1.046, grad_norm=114.837, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.656e-05, train_time=2.928 +[gpub002:0/64] 2023-07-12 18:37:49,975 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 18:38:40,955 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-12 18:38:58,926 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 18:39:02,533 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 18:39:02,533 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-12 18:39:02,539 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 18:44:47,143 (trainer:732) INFO: 41epoch:train:801-900batch: iter_time=1.647, forward_time=0.146, loss_ctc=72.599, loss_att=54.115, acc=0.703, loss=59.660, backward_time=1.040, grad_norm=128.541, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.655e-05, train_time=8.395 +[gpub002:0/64] 2023-07-12 18:45:06,195 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 18:47:03,912 (trainer:732) INFO: 41epoch:train:901-1000batch: iter_time=1.239e-04, forward_time=0.145, loss_ctc=70.940, loss_att=54.535, acc=0.709, loss=59.457, backward_time=1.029, grad_norm=120.295, clip=100.000, loss_scale=1.821e+32, optim_step_time=0.182, optim0_lr0=5.655e-05, train_time=2.735 +[gpub002:0/64] 2023-07-12 18:49:19,533 (trainer:732) INFO: 41epoch:train:1001-1100batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=61.609, loss_att=44.306, acc=0.724, loss=49.497, backward_time=1.027, grad_norm=111.947, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.654e-05, train_time=2.712 +[gpub002:0/64] 2023-07-12 18:51:36,281 (trainer:732) INFO: 41epoch:train:1101-1200batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=73.032, loss_att=63.628, acc=0.705, loss=66.449, backward_time=1.034, grad_norm=164.160, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.653e-05, train_time=2.735 +[gpub002:0/64] 2023-07-12 18:53:52,356 (trainer:732) INFO: 41epoch:train:1201-1300batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=68.936, loss_att=48.977, acc=0.729, loss=54.965, backward_time=1.030, grad_norm=118.209, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.652e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 18:56:08,340 (trainer:732) INFO: 41epoch:train:1301-1400batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=72.457, loss_att=57.311, acc=0.711, loss=61.854, backward_time=1.030, grad_norm=123.795, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.652e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 18:58:24,327 (trainer:732) INFO: 41epoch:train:1401-1500batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=69.223, loss_att=53.363, acc=0.719, loss=58.121, backward_time=1.030, grad_norm=100.726, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.651e-05, train_time=2.720 +[gpub002:0/64] 2023-07-12 19:00:40,397 (trainer:732) INFO: 41epoch:train:1501-1600batch: iter_time=1.334e-04, forward_time=0.146, loss_ctc=71.626, loss_att=57.110, acc=0.711, loss=61.465, backward_time=1.031, grad_norm=128.152, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.650e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 19:02:11,569 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-12 19:02:29,844 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 19:02:33,282 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 19:02:33,282 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-12 19:02:33,289 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 19:08:06,638 (trainer:732) INFO: 41epoch:train:1601-1700batch: iter_time=1.676, forward_time=0.169, loss_ctc=70.421, loss_att=49.398, acc=0.710, loss=55.705, backward_time=1.041, grad_norm=138.098, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.650e-05, train_time=8.923 +[gpub002:0/64] 2023-07-12 19:10:23,384 (trainer:732) INFO: 41epoch:train:1701-1800batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=69.158, loss_att=55.267, acc=0.709, loss=59.434, backward_time=1.031, grad_norm=104.995, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.649e-05, train_time=2.736 +[gpub002:0/64] 2023-07-12 19:12:38,917 (trainer:732) INFO: 41epoch:train:1801-1900batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=66.374, loss_att=50.597, acc=0.699, loss=55.330, backward_time=1.027, grad_norm=112.628, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.648e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 19:14:54,622 (trainer:732) INFO: 41epoch:train:1901-2000batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=68.154, loss_att=57.325, acc=0.709, loss=60.574, backward_time=1.027, grad_norm=103.678, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.647e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 19:17:10,478 (trainer:732) INFO: 41epoch:train:2001-2100batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=72.381, loss_att=54.045, acc=0.729, loss=59.546, backward_time=1.027, grad_norm=114.921, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.647e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 19:19:26,375 (trainer:732) INFO: 41epoch:train:2101-2200batch: iter_time=1.268e-04, forward_time=0.145, loss_ctc=68.777, loss_att=54.166, acc=0.702, loss=58.549, backward_time=1.028, grad_norm=113.943, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.646e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 19:21:42,063 (trainer:732) INFO: 41epoch:train:2201-2300batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=63.800, loss_att=49.285, acc=0.717, loss=53.640, backward_time=1.027, grad_norm=146.299, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.645e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 19:23:57,840 (trainer:732) INFO: 41epoch:train:2301-2400batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=74.630, loss_att=56.323, acc=0.712, loss=61.815, backward_time=1.028, grad_norm=111.859, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.644e-05, train_time=2.715 +[gpub002:0/64] 2023-07-12 19:26:19,254 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-12 19:26:37,400 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 19:26:40,808 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 19:26:40,808 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-12 19:26:40,814 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 19:29:54,444 (trainer:732) INFO: 41epoch:train:2401-2500batch: iter_time=2.164, forward_time=0.165, loss_ctc=73.293, loss_att=57.191, acc=0.704, loss=62.022, backward_time=1.035, grad_norm=154.823, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.644e-05, train_time=7.132 +[gpub002:0/64] 2023-07-12 19:32:11,981 (trainer:732) INFO: 41epoch:train:2501-2600batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=70.299, loss_att=55.298, acc=0.703, loss=59.798, backward_time=1.037, grad_norm=109.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.643e-05, train_time=2.751 +[gpub002:0/64] 2023-07-12 19:34:27,583 (trainer:732) INFO: 41epoch:train:2601-2700batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=65.933, loss_att=48.206, acc=0.704, loss=53.524, backward_time=1.028, grad_norm=91.627, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.642e-05, train_time=2.712 +[gpub002:0/64] 2023-07-12 19:36:43,388 (trainer:732) INFO: 41epoch:train:2701-2800batch: iter_time=1.208e-04, forward_time=0.147, loss_ctc=64.547, loss_att=51.693, acc=0.716, loss=55.549, backward_time=1.027, grad_norm=123.357, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.642e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 19:38:59,319 (trainer:732) INFO: 41epoch:train:2801-2900batch: iter_time=1.147e-04, forward_time=0.145, loss_ctc=74.813, loss_att=58.932, acc=0.715, loss=63.696, backward_time=1.030, grad_norm=122.567, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.641e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 19:41:15,045 (trainer:732) INFO: 41epoch:train:2901-3000batch: iter_time=1.152e-04, forward_time=0.145, loss_ctc=61.440, loss_att=45.559, acc=0.725, loss=50.324, backward_time=1.028, grad_norm=124.359, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.640e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 19:43:31,121 (trainer:732) INFO: 41epoch:train:3001-3100batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=73.113, loss_att=58.408, acc=0.712, loss=62.819, backward_time=1.028, grad_norm=132.870, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.639e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 19:45:49,866 (trainer:732) INFO: 41epoch:train:3101-3200batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=70.690, loss_att=51.235, acc=0.716, loss=57.071, backward_time=1.033, grad_norm=113.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.639e-05, train_time=2.775 +[gpub002:0/64] 2023-07-12 19:48:12,230 (trainer:732) INFO: 41epoch:train:3201-3300batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=72.441, loss_att=57.254, acc=0.711, loss=61.810, backward_time=1.033, grad_norm=107.815, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.638e-05, train_time=2.847 +[gpub002:0/64] 2023-07-12 19:49:02,365 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-12 19:49:20,319 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 19:49:23,731 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 19:49:23,731 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-12 19:49:23,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 19:54:17,214 (trainer:732) INFO: 41epoch:train:3301-3400batch: iter_time=1.844, forward_time=0.147, loss_ctc=67.576, loss_att=49.521, acc=0.713, loss=54.938, backward_time=1.043, grad_norm=133.347, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.637e-05, train_time=7.299 +[gpub002:0/64] 2023-07-12 19:56:33,870 (trainer:732) INFO: 41epoch:train:3401-3500batch: iter_time=1.057e-04, forward_time=0.146, loss_ctc=70.531, loss_att=54.713, acc=0.713, loss=59.458, backward_time=1.030, grad_norm=130.288, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.637e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 19:58:49,517 (trainer:732) INFO: 41epoch:train:3501-3600batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=60.897, loss_att=42.699, acc=0.730, loss=48.158, backward_time=1.028, grad_norm=137.043, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.636e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 20:01:05,892 (trainer:732) INFO: 41epoch:train:3601-3700batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=74.026, loss_att=64.166, acc=0.709, loss=67.124, backward_time=1.032, grad_norm=127.815, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.635e-05, train_time=2.727 +[gpub002:0/64] 2023-07-12 20:03:21,618 (trainer:732) INFO: 41epoch:train:3701-3800batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=67.233, loss_att=48.554, acc=0.733, loss=54.158, backward_time=1.028, grad_norm=114.242, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.634e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 20:05:37,755 (trainer:732) INFO: 41epoch:train:3801-3900batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=70.919, loss_att=56.270, acc=0.718, loss=60.665, backward_time=1.031, grad_norm=122.264, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.634e-05, train_time=2.723 +[gpub002:0/64] 2023-07-12 20:07:53,980 (trainer:732) INFO: 41epoch:train:3901-4000batch: iter_time=1.098e-04, forward_time=0.146, loss_ctc=69.754, loss_att=53.402, acc=0.720, loss=58.308, backward_time=1.032, grad_norm=108.612, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.633e-05, train_time=2.724 +[gpub002:0/64] 2023-07-12 20:10:10,380 (trainer:732) INFO: 41epoch:train:4001-4100batch: iter_time=1.024e-04, forward_time=0.147, loss_ctc=70.275, loss_att=56.231, acc=0.716, loss=60.444, backward_time=1.032, grad_norm=117.778, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.632e-05, train_time=2.728 +[gpub002:0/64] 2023-07-12 20:11:39,912 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-12 20:11:58,290 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 20:12:01,692 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 20:12:01,692 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-12 20:12:01,698 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 20:17:26,480 (trainer:732) INFO: 41epoch:train:4101-4200batch: iter_time=1.566, forward_time=0.146, loss_ctc=72.772, loss_att=55.012, acc=0.708, loss=60.340, backward_time=1.042, grad_norm=117.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.632e-05, train_time=8.722 +[gpub002:0/64] 2023-07-12 20:19:42,895 (trainer:732) INFO: 41epoch:train:4201-4300batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=67.515, loss_att=51.939, acc=0.704, loss=56.612, backward_time=1.030, grad_norm=123.653, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.631e-05, train_time=2.728 +[gpub002:0/64] 2023-07-12 20:21:58,413 (trainer:732) INFO: 41epoch:train:4301-4400batch: iter_time=1.320e-04, forward_time=0.145, loss_ctc=61.290, loss_att=47.503, acc=0.716, loss=51.639, backward_time=1.025, grad_norm=112.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.630e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 20:24:14,368 (trainer:732) INFO: 41epoch:train:4401-4500batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=71.825, loss_att=62.339, acc=0.697, loss=65.185, backward_time=1.029, grad_norm=117.005, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.629e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 20:26:30,216 (trainer:732) INFO: 41epoch:train:4501-4600batch: iter_time=1.354e-04, forward_time=0.145, loss_ctc=64.044, loss_att=44.250, acc=0.731, loss=50.189, backward_time=1.028, grad_norm=95.630, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.629e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 20:28:46,011 (trainer:732) INFO: 41epoch:train:4601-4700batch: iter_time=1.354e-04, forward_time=0.144, loss_ctc=72.970, loss_att=56.752, acc=0.705, loss=61.617, backward_time=1.029, grad_norm=111.368, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.628e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 20:31:02,650 (trainer:732) INFO: 41epoch:train:4701-4800batch: iter_time=1.450e-04, forward_time=0.144, loss_ctc=71.101, loss_att=53.521, acc=0.717, loss=58.795, backward_time=1.029, grad_norm=100.929, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.627e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 20:33:18,834 (trainer:732) INFO: 41epoch:train:4801-4900batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=72.523, loss_att=58.663, acc=0.707, loss=62.821, backward_time=1.031, grad_norm=127.161, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.627e-05, train_time=2.723 +[gpub002:0/64] 2023-07-12 20:35:36,747 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-12 20:35:54,833 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 20:35:58,239 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 20:35:58,239 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-12 20:35:58,246 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 20:41:04,776 (trainer:732) INFO: 41epoch:train:4901-5000batch: iter_time=1.597, forward_time=0.146, loss_ctc=69.320, loss_att=46.915, acc=0.725, loss=53.637, backward_time=1.038, grad_norm=113.892, clip=100.000, loss_scale=3.018e+32, optim_step_time=0.182, optim0_lr0=5.626e-05, train_time=9.319 +[gpub002:0/64] 2023-07-12 20:43:22,964 (trainer:732) INFO: 41epoch:train:5001-5100batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=68.557, loss_att=53.531, acc=0.716, loss=58.039, backward_time=1.036, grad_norm=109.852, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.625e-05, train_time=2.764 +[gpub002:0/64] 2023-07-12 20:45:38,905 (trainer:732) INFO: 41epoch:train:5101-5200batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=65.544, loss_att=48.308, acc=0.716, loss=53.479, backward_time=1.029, grad_norm=123.663, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.624e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 20:47:55,428 (trainer:732) INFO: 41epoch:train:5201-5300batch: iter_time=9.594e-05, forward_time=0.147, loss_ctc=69.697, loss_att=59.000, acc=0.717, loss=62.209, backward_time=1.031, grad_norm=109.695, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.624e-05, train_time=2.730 +[gpub002:0/64] 2023-07-12 20:50:11,373 (trainer:732) INFO: 41epoch:train:5301-5400batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=71.695, loss_att=51.848, acc=0.732, loss=57.802, backward_time=1.029, grad_norm=105.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.623e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 20:52:26,937 (trainer:732) INFO: 41epoch:train:5401-5500batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=69.713, loss_att=56.136, acc=0.698, loss=60.209, backward_time=1.026, grad_norm=124.763, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.622e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 20:54:42,808 (trainer:732) INFO: 41epoch:train:5501-5600batch: iter_time=1.183e-04, forward_time=0.145, loss_ctc=65.673, loss_att=49.982, acc=0.728, loss=54.690, backward_time=1.028, grad_norm=121.869, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.622e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 20:56:58,543 (trainer:732) INFO: 41epoch:train:5601-5700batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=72.242, loss_att=52.803, acc=0.723, loss=58.635, backward_time=1.027, grad_norm=111.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.621e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 20:59:14,728 (trainer:732) INFO: 41epoch:train:5701-5800batch: iter_time=9.577e-05, forward_time=0.145, loss_ctc=69.925, loss_att=54.850, acc=0.714, loss=59.373, backward_time=1.031, grad_norm=113.233, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.620e-05, train_time=2.723 +[gpub002:0/64] 2023-07-12 21:00:01,150 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-12 21:00:19,189 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 21:00:22,555 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 21:00:22,555 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-12 21:00:22,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 21:05:30,215 (trainer:732) INFO: 41epoch:train:5801-5900batch: iter_time=1.645, forward_time=0.193, loss_ctc=72.625, loss_att=53.896, acc=0.723, loss=59.515, backward_time=1.042, grad_norm=126.902, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.619e-05, train_time=7.509 +[gpub002:0/64] 2023-07-12 21:07:46,821 (trainer:732) INFO: 41epoch:train:5901-6000batch: iter_time=1.429e-04, forward_time=0.147, loss_ctc=66.989, loss_att=49.786, acc=0.709, loss=54.947, backward_time=1.029, grad_norm=130.905, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.619e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 21:10:03,539 (trainer:732) INFO: 41epoch:train:6001-6100batch: iter_time=1.134e-04, forward_time=0.149, loss_ctc=64.189, loss_att=51.518, acc=0.726, loss=55.319, backward_time=1.031, grad_norm=103.534, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.618e-05, train_time=2.734 +[gpub002:0/64] 2023-07-12 21:12:19,396 (trainer:732) INFO: 41epoch:train:6101-6200batch: iter_time=1.098e-04, forward_time=0.146, loss_ctc=72.559, loss_att=55.954, acc=0.724, loss=60.936, backward_time=1.029, grad_norm=122.019, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.617e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 21:14:35,307 (trainer:732) INFO: 41epoch:train:6201-6300batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=64.191, loss_att=50.498, acc=0.713, loss=54.606, backward_time=1.028, grad_norm=114.048, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.617e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 21:15:07,857 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 21:16:51,122 (trainer:732) INFO: 41epoch:train:6301-6400batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=70.103, loss_att=54.476, acc=0.732, loss=59.164, backward_time=1.030, grad_norm=103.656, clip=100.000, loss_scale=1.987e+32, optim_step_time=0.183, optim0_lr0=5.616e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 21:17:42,514 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 21:19:06,809 (trainer:732) INFO: 41epoch:train:6401-6500batch: iter_time=1.398e-04, forward_time=0.146, loss_ctc=70.713, loss_att=52.907, acc=0.719, loss=58.249, backward_time=1.028, grad_norm=129.552, clip=100.000, loss_scale=1.109e+32, optim_step_time=0.182, optim0_lr0=5.615e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 21:21:23,047 (trainer:732) INFO: 41epoch:train:6501-6600batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=71.542, loss_att=55.074, acc=0.720, loss=60.015, backward_time=1.031, grad_norm=119.497, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.615e-05, train_time=2.725 +[gpub002:0/64] 2023-07-12 21:23:04,288 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-12 21:23:22,599 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 21:23:26,047 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 21:23:26,047 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-12 21:23:26,053 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 21:28:01,974 (trainer:732) INFO: 41epoch:train:6601-6700batch: iter_time=2.543, forward_time=0.146, loss_ctc=71.485, loss_att=51.860, acc=0.717, loss=57.748, backward_time=1.039, grad_norm=121.242, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.614e-05, train_time=7.978 +[gpub002:0/64] 2023-07-12 21:30:19,098 (trainer:732) INFO: 41epoch:train:6701-6800batch: iter_time=1.407e-04, forward_time=0.145, loss_ctc=67.783, loss_att=52.647, acc=0.706, loss=57.188, backward_time=1.030, grad_norm=102.934, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.613e-05, train_time=2.742 +[gpub002:0/64] 2023-07-12 21:32:35,143 (trainer:732) INFO: 41epoch:train:6801-6900batch: iter_time=1.370e-04, forward_time=0.149, loss_ctc=60.432, loss_att=44.809, acc=0.723, loss=49.496, backward_time=1.030, grad_norm=143.111, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.612e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 21:34:51,046 (trainer:732) INFO: 41epoch:train:6901-7000batch: iter_time=1.716e-04, forward_time=0.147, loss_ctc=73.656, loss_att=65.329, acc=0.698, loss=67.827, backward_time=1.029, grad_norm=127.853, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.612e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 21:37:06,844 (trainer:732) INFO: 41epoch:train:7001-7100batch: iter_time=1.519e-04, forward_time=0.147, loss_ctc=63.569, loss_att=45.390, acc=0.737, loss=50.844, backward_time=1.028, grad_norm=115.348, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.611e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 21:39:22,832 (trainer:732) INFO: 41epoch:train:7101-7200batch: iter_time=1.483e-04, forward_time=0.147, loss_ctc=73.348, loss_att=57.141, acc=0.707, loss=62.003, backward_time=1.030, grad_norm=125.156, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.610e-05, train_time=2.720 +[gpub002:0/64] 2023-07-12 21:41:38,634 (trainer:732) INFO: 41epoch:train:7201-7300batch: iter_time=1.409e-04, forward_time=0.145, loss_ctc=68.433, loss_att=52.831, acc=0.715, loss=57.512, backward_time=1.026, grad_norm=117.453, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.610e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 21:43:54,806 (trainer:732) INFO: 41epoch:train:7301-7400batch: iter_time=1.454e-04, forward_time=0.148, loss_ctc=69.535, loss_att=57.942, acc=0.709, loss=61.420, backward_time=1.031, grad_norm=142.520, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.609e-05, train_time=2.723 +[gpub002:0/64] 2023-07-12 21:46:12,782 (trainer:732) INFO: 41epoch:train:7401-7500batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=67.374, loss_att=45.684, acc=0.729, loss=52.191, backward_time=1.031, grad_norm=135.947, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.608e-05, train_time=2.759 +[gpub002:0/64] 2023-07-12 21:46:15,854 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-12 21:46:34,163 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 21:46:37,582 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 21:46:37,582 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-12 21:46:37,588 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 21:54:09,248 (trainer:732) INFO: 41epoch:train:7501-7600batch: iter_time=1.603, forward_time=0.145, loss_ctc=71.396, loss_att=58.603, acc=0.700, loss=62.441, backward_time=1.042, grad_norm=141.498, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.607e-05, train_time=9.529 +[gpub002:0/64] 2023-07-12 21:56:25,789 (trainer:732) INFO: 41epoch:train:7601-7700batch: iter_time=1.471e-04, forward_time=0.144, loss_ctc=66.114, loss_att=48.625, acc=0.707, loss=53.872, backward_time=1.029, grad_norm=119.762, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.607e-05, train_time=2.731 +[gpub002:0/64] 2023-07-12 21:58:41,647 (trainer:732) INFO: 41epoch:train:7701-7800batch: iter_time=1.349e-04, forward_time=0.145, loss_ctc=63.466, loss_att=50.427, acc=0.716, loss=54.339, backward_time=1.028, grad_norm=113.922, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.606e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 22:00:59,471 (trainer:732) INFO: 41epoch:train:7801-7900batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=72.326, loss_att=56.756, acc=0.720, loss=61.427, backward_time=1.031, grad_norm=109.457, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.605e-05, train_time=2.756 +[gpub002:0/64] 2023-07-12 22:03:19,497 (trainer:732) INFO: 41epoch:train:7901-8000batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=60.819, loss_att=44.765, acc=0.722, loss=49.581, backward_time=1.033, grad_norm=106.824, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.605e-05, train_time=2.800 +[gpub002:0/64] 2023-07-12 22:05:35,694 (trainer:732) INFO: 41epoch:train:8001-8100batch: iter_time=1.268e-04, forward_time=0.146, loss_ctc=75.424, loss_att=59.810, acc=0.714, loss=64.495, backward_time=1.030, grad_norm=123.469, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.604e-05, train_time=2.724 +[gpub002:0/64] 2023-07-12 22:07:51,699 (trainer:732) INFO: 41epoch:train:8101-8200batch: iter_time=1.220e-04, forward_time=0.147, loss_ctc=69.288, loss_att=51.179, acc=0.716, loss=56.612, backward_time=1.029, grad_norm=117.088, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.603e-05, train_time=2.720 +[gpub002:0/64] 2023-07-12 22:10:07,627 (trainer:732) INFO: 41epoch:train:8201-8300batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=72.391, loss_att=55.439, acc=0.714, loss=60.525, backward_time=1.029, grad_norm=127.931, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.603e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 22:10:55,464 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-12 22:11:13,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 22:11:17,494 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 22:11:17,494 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-12 22:11:17,500 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 22:18:38,342 (trainer:732) INFO: 41epoch:train:8301-8400batch: iter_time=1.615, forward_time=0.147, loss_ctc=67.249, loss_att=51.366, acc=0.704, loss=56.131, backward_time=1.046, grad_norm=129.432, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.602e-05, train_time=10.214 +[gpub002:0/64] 2023-07-12 22:20:55,423 (trainer:732) INFO: 41epoch:train:8401-8500batch: iter_time=9.569e-05, forward_time=0.145, loss_ctc=68.689, loss_att=52.556, acc=0.712, loss=57.396, backward_time=1.031, grad_norm=129.356, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.601e-05, train_time=2.741 +[gpub002:0/64] 2023-07-12 22:23:11,495 (trainer:732) INFO: 41epoch:train:8501-8600batch: iter_time=9.187e-05, forward_time=0.146, loss_ctc=62.059, loss_att=44.195, acc=0.721, loss=49.554, backward_time=1.028, grad_norm=111.154, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.600e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 22:25:27,686 (trainer:732) INFO: 41epoch:train:8601-8700batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=71.896, loss_att=62.165, acc=0.700, loss=65.084, backward_time=1.030, grad_norm=133.087, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.600e-05, train_time=2.724 +[gpub002:0/64] 2023-07-12 22:27:43,333 (trainer:732) INFO: 41epoch:train:8701-8800batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=68.306, loss_att=48.020, acc=0.733, loss=54.106, backward_time=1.027, grad_norm=109.698, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.599e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 22:29:58,990 (trainer:732) INFO: 41epoch:train:8801-8900batch: iter_time=1.056e-04, forward_time=0.145, loss_ctc=70.034, loss_att=54.974, acc=0.709, loss=59.492, backward_time=1.026, grad_norm=125.573, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.598e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 22:32:14,802 (trainer:732) INFO: 41epoch:train:8901-9000batch: iter_time=9.824e-05, forward_time=0.146, loss_ctc=70.258, loss_att=54.489, acc=0.715, loss=59.220, backward_time=1.028, grad_norm=114.409, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.598e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 22:34:31,075 (trainer:732) INFO: 41epoch:train:9001-9100batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=68.492, loss_att=54.068, acc=0.716, loss=58.395, backward_time=1.029, grad_norm=140.544, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.597e-05, train_time=2.725 +[gpub002:0/64] 2023-07-12 22:36:03,875 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-12 22:36:22,352 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 22:36:26,145 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 22:36:26,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-12 22:36:26,151 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 22:42:43,033 (trainer:732) INFO: 41epoch:train:9101-9200batch: iter_time=1.881, forward_time=0.184, loss_ctc=68.469, loss_att=50.264, acc=0.718, loss=55.726, backward_time=1.044, grad_norm=106.621, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.596e-05, train_time=9.838 +[gpub002:0/64] 2023-07-12 22:45:00,788 (trainer:732) INFO: 41epoch:train:9201-9300batch: iter_time=1.293e-04, forward_time=0.148, loss_ctc=68.471, loss_att=53.732, acc=0.720, loss=58.153, backward_time=1.035, grad_norm=122.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.596e-05, train_time=2.755 +[gpub002:0/64] 2023-07-12 22:47:19,808 (trainer:732) INFO: 41epoch:train:9301-9400batch: iter_time=1.176e-04, forward_time=0.147, loss_ctc=65.914, loss_att=48.978, acc=0.714, loss=54.059, backward_time=1.037, grad_norm=109.971, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.595e-05, train_time=2.780 +[gpub002:0/64] 2023-07-12 22:49:36,096 (trainer:732) INFO: 41epoch:train:9401-9500batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=68.014, loss_att=54.562, acc=0.726, loss=58.597, backward_time=1.029, grad_norm=117.199, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.594e-05, train_time=2.726 +[gpub002:0/64] 2023-07-12 22:51:51,997 (trainer:732) INFO: 41epoch:train:9501-9600batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=71.249, loss_att=53.735, acc=0.733, loss=58.989, backward_time=1.029, grad_norm=138.178, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.593e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 22:54:07,882 (trainer:732) INFO: 41epoch:train:9601-9700batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=66.076, loss_att=52.907, acc=0.713, loss=56.858, backward_time=1.028, grad_norm=106.233, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.593e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 22:56:23,823 (trainer:732) INFO: 41epoch:train:9701-9800batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=62.985, loss_att=48.457, acc=0.729, loss=52.816, backward_time=1.029, grad_norm=112.368, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.592e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 22:58:40,560 (trainer:732) INFO: 41epoch:train:9801-9900batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=74.657, loss_att=55.990, acc=0.719, loss=61.590, backward_time=1.027, grad_norm=113.466, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.591e-05, train_time=2.735 +[gpub002:0/64] 2023-07-12 23:00:56,830 (trainer:732) INFO: 41epoch:train:9901-10000batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=72.076, loss_att=56.216, acc=0.714, loss=60.974, backward_time=1.030, grad_norm=131.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.591e-05, train_time=2.725 +[gpub002:0/64] 2023-07-12 23:18:11,678 (trainer:338) INFO: 41epoch results: [train] iter_time=0.233, forward_time=0.147, loss_ctc=69.121, loss_att=53.257, acc=0.715, loss=58.016, backward_time=1.032, grad_norm=120.506, clip=100.000, loss_scale=1.781e+32, optim_step_time=0.182, optim0_lr0=5.626e-05, train_time=3.453, time=4 hours, 48 minutes and 7.1 seconds, total_count=380000, gpu_max_cached_mem_GB=37.572, [valid] loss_ctc=44.125, cer_ctc=0.260, loss_att=39.542, acc=0.666, cer=0.430, wer=0.999, loss=40.917, time=8 minutes and 29.09 seconds, total_count=38962, gpu_max_cached_mem_GB=37.572, [att_plot] time=8 minutes and 26.85 seconds, total_count=0, gpu_max_cached_mem_GB=37.572 +[gpub002:0/64] 2023-07-12 23:18:30,669 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-12 23:18:30,800 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/36epoch.pth +[gpub002:0/64] 2023-07-12 23:18:30,800 (trainer:272) INFO: 42/50epoch started. Estimated time to finish: 1 day, 20 hours and 54 minutes +[gpub002:0/64] 2023-07-12 23:18:31,369 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-12 23:18:49,582 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 23:18:55,229 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 23:18:55,229 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-12 23:18:55,791 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 23:37:37,107 (trainer:732) INFO: 42epoch:train:1-100batch: iter_time=8.724, forward_time=1.113, loss_ctc=71.175, loss_att=49.430, acc=0.718, loss=55.953, backward_time=1.227, grad_norm=114.213, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=5.590e-05, train_time=22.913 +[gpub002:0/64] 2023-07-12 23:42:11,727 (trainer:732) INFO: 42epoch:train:101-200batch: iter_time=0.006, forward_time=0.982, loss_ctc=69.942, loss_att=54.374, acc=0.696, loss=59.044, backward_time=1.246, grad_norm=121.482, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.288, optim0_lr0=5.589e-05, train_time=5.491 +[gpub002:0/64] 2023-07-12 23:47:05,839 (trainer:732) INFO: 42epoch:train:201-300batch: iter_time=0.017, forward_time=1.206, loss_ctc=61.801, loss_att=45.949, acc=0.719, loss=50.705, backward_time=1.243, grad_norm=123.757, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.240, optim0_lr0=5.589e-05, train_time=5.881 +[gpub002:0/64] 2023-07-12 23:50:49,717 (trainer:732) INFO: 42epoch:train:301-400batch: iter_time=0.004, forward_time=0.743, loss_ctc=73.412, loss_att=51.589, acc=0.721, loss=58.136, backward_time=1.151, grad_norm=125.779, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.222, optim0_lr0=5.588e-05, train_time=4.480 +[gpub002:0/64] 2023-07-12 23:54:19,195 (trainer:732) INFO: 42epoch:train:401-500batch: iter_time=0.012, forward_time=0.551, loss_ctc=82.982, loss_att=60.880, acc=0.688, loss=67.511, backward_time=1.134, grad_norm=124.069, clip=100.000, loss_scale=1.314e+32, optim_step_time=0.209, optim0_lr0=5.587e-05, train_time=4.189 +[gpub002:0/64] 2023-07-12 23:57:28,757 (trainer:732) INFO: 42epoch:train:501-600batch: iter_time=0.002, forward_time=0.518, loss_ctc=78.082, loss_att=61.663, acc=0.708, loss=66.589, backward_time=1.094, grad_norm=124.871, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.201, optim0_lr0=5.586e-05, train_time=3.791 +[gpub002:0/64] 2023-07-13 00:00:25,025 (trainer:732) INFO: 42epoch:train:601-700batch: iter_time=6.864e-04, forward_time=0.420, loss_ctc=79.441, loss_att=60.935, acc=0.691, loss=66.487, backward_time=1.078, grad_norm=128.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.199, optim0_lr0=5.586e-05, train_time=3.526 +[gpub002:0/64] 2023-07-13 00:03:02,236 (trainer:732) INFO: 42epoch:train:701-800batch: iter_time=4.233e-04, forward_time=0.282, loss_ctc=72.996, loss_att=53.014, acc=0.712, loss=59.009, backward_time=1.066, grad_norm=122.990, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.197, optim0_lr0=5.585e-05, train_time=3.144 +[gpub002:0/64] 2023-07-13 00:04:00,778 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 00:04:18,719 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 00:04:22,403 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 00:04:22,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 00:04:22,409 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 00:09:56,081 (trainer:732) INFO: 42epoch:train:801-900batch: iter_time=2.590, forward_time=0.185, loss_ctc=73.051, loss_att=54.592, acc=0.715, loss=60.130, backward_time=1.048, grad_norm=128.053, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.584e-05, train_time=8.279 +[gpub002:0/64] 2023-07-13 00:12:13,867 (trainer:732) INFO: 42epoch:train:901-1000batch: iter_time=1.283e-04, forward_time=0.148, loss_ctc=66.524, loss_att=49.934, acc=0.708, loss=54.911, backward_time=1.035, grad_norm=121.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.584e-05, train_time=2.756 +[gpub002:0/64] 2023-07-13 00:14:30,359 (trainer:732) INFO: 42epoch:train:1001-1100batch: iter_time=1.139e-04, forward_time=0.147, loss_ctc=61.394, loss_att=46.990, acc=0.713, loss=51.311, backward_time=1.031, grad_norm=98.338, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.583e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 00:16:46,091 (trainer:732) INFO: 42epoch:train:1101-1200batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=68.116, loss_att=48.127, acc=0.735, loss=54.124, backward_time=1.028, grad_norm=118.499, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.582e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 00:19:03,313 (trainer:732) INFO: 42epoch:train:1201-1300batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=78.528, loss_att=57.137, acc=0.710, loss=63.554, backward_time=1.036, grad_norm=124.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.582e-05, train_time=2.744 +[gpub002:0/64] 2023-07-13 00:21:20,377 (trainer:732) INFO: 42epoch:train:1301-1400batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=79.593, loss_att=62.972, acc=0.704, loss=67.958, backward_time=1.031, grad_norm=134.092, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.581e-05, train_time=2.741 +[gpub002:0/64] 2023-07-13 00:23:54,097 (trainer:732) INFO: 42epoch:train:1401-1500batch: iter_time=1.091e-04, forward_time=0.145, loss_ctc=74.007, loss_att=54.221, acc=0.718, loss=60.157, backward_time=1.048, grad_norm=127.878, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.580e-05, train_time=3.074 +[gpub002:0/64] 2023-07-13 00:26:09,909 (trainer:732) INFO: 42epoch:train:1501-1600batch: iter_time=1.349e-04, forward_time=0.144, loss_ctc=75.275, loss_att=58.574, acc=0.702, loss=63.584, backward_time=1.029, grad_norm=121.251, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.579e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 00:27:59,966 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 00:28:18,102 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 00:28:22,132 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 00:28:22,141 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 00:28:22,317 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 00:35:20,703 (trainer:732) INFO: 42epoch:train:1601-1700batch: iter_time=4.060, forward_time=0.198, loss_ctc=75.398, loss_att=54.018, acc=0.726, loss=60.432, backward_time=1.041, grad_norm=136.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.579e-05, train_time=11.016 +[gpub002:0/64] 2023-07-13 00:36:32,130 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 00:37:37,330 (trainer:732) INFO: 42epoch:train:1701-1800batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=64.814, loss_att=48.109, acc=0.703, loss=53.121, backward_time=1.033, grad_norm=135.699, clip=100.000, loss_scale=1.225e+32, optim_step_time=0.182, optim0_lr0=5.578e-05, train_time=2.732 +[gpub002:0/64] 2023-07-13 00:39:53,639 (trainer:732) INFO: 42epoch:train:1801-1900batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=62.019, loss_att=46.732, acc=0.717, loss=51.318, backward_time=1.032, grad_norm=99.223, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.577e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 00:42:09,855 (trainer:732) INFO: 42epoch:train:1901-2000batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=68.823, loss_att=48.017, acc=0.738, loss=54.259, backward_time=1.028, grad_norm=118.420, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.577e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 00:44:25,673 (trainer:732) INFO: 42epoch:train:2001-2100batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=81.313, loss_att=58.446, acc=0.706, loss=65.306, backward_time=1.028, grad_norm=134.672, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.576e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 00:46:41,649 (trainer:732) INFO: 42epoch:train:2101-2200batch: iter_time=1.066e-04, forward_time=0.146, loss_ctc=78.344, loss_att=62.937, acc=0.702, loss=67.559, backward_time=1.030, grad_norm=151.716, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.575e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 00:48:57,917 (trainer:732) INFO: 42epoch:train:2201-2300batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=75.719, loss_att=57.616, acc=0.712, loss=63.047, backward_time=1.032, grad_norm=184.811, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.575e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 00:51:28,156 (trainer:732) INFO: 42epoch:train:2301-2400batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=73.652, loss_att=53.750, acc=0.713, loss=59.720, backward_time=1.049, grad_norm=115.411, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.574e-05, train_time=3.005 +[gpub002:0/64] 2023-07-13 00:53:44,309 (trainer:732) INFO: 42epoch:train:2401-2500batch: iter_time=1.099e-04, forward_time=0.146, loss_ctc=70.966, loss_att=49.355, acc=0.718, loss=55.839, backward_time=1.028, grad_norm=106.942, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.573e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 00:53:46,681 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 00:54:04,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 00:54:08,345 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 00:54:08,345 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 00:54:08,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 01:01:39,737 (trainer:732) INFO: 42epoch:train:2501-2600batch: iter_time=1.820, forward_time=0.145, loss_ctc=69.815, loss_att=48.599, acc=0.726, loss=54.963, backward_time=1.041, grad_norm=140.834, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.573e-05, train_time=9.508 +[gpub002:0/64] 2023-07-13 01:03:56,212 (trainer:732) INFO: 42epoch:train:2601-2700batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=67.653, loss_att=52.431, acc=0.712, loss=56.997, backward_time=1.031, grad_norm=109.062, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.572e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 01:06:11,899 (trainer:732) INFO: 42epoch:train:2701-2800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=59.663, loss_att=44.027, acc=0.729, loss=48.718, backward_time=1.028, grad_norm=114.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.571e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 01:08:27,521 (trainer:732) INFO: 42epoch:train:2801-2900batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=71.051, loss_att=47.872, acc=0.736, loss=54.826, backward_time=1.028, grad_norm=161.477, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.570e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 01:10:43,390 (trainer:732) INFO: 42epoch:train:2901-3000batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=80.680, loss_att=59.876, acc=0.698, loss=66.117, backward_time=1.029, grad_norm=125.354, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.570e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 01:12:59,481 (trainer:732) INFO: 42epoch:train:3001-3100batch: iter_time=1.471e-04, forward_time=0.146, loss_ctc=74.273, loss_att=57.896, acc=0.724, loss=62.809, backward_time=1.030, grad_norm=158.704, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.569e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 01:15:15,668 (trainer:732) INFO: 42epoch:train:3101-3200batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=78.031, loss_att=59.327, acc=0.704, loss=64.938, backward_time=1.030, grad_norm=153.548, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.568e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 01:17:36,473 (trainer:732) INFO: 42epoch:train:3201-3300batch: iter_time=1.264e-04, forward_time=0.145, loss_ctc=73.496, loss_att=52.683, acc=0.721, loss=58.927, backward_time=1.028, grad_norm=135.257, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.568e-05, train_time=2.816 +[gpub002:0/64] 2023-07-13 01:18:26,019 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 01:18:44,507 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 01:18:47,930 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 01:18:47,930 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 01:18:47,937 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 01:25:11,697 (trainer:732) INFO: 42epoch:train:3301-3400batch: iter_time=1.638, forward_time=0.194, loss_ctc=72.538, loss_att=53.392, acc=0.723, loss=59.136, backward_time=1.050, grad_norm=121.024, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=5.567e-05, train_time=9.103 +[gpub002:0/64] 2023-07-13 01:27:27,626 (trainer:732) INFO: 42epoch:train:3401-3500batch: iter_time=1.464e-04, forward_time=0.146, loss_ctc=67.316, loss_att=49.889, acc=0.703, loss=55.117, backward_time=1.029, grad_norm=129.513, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.566e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 01:29:43,626 (trainer:732) INFO: 42epoch:train:3501-3600batch: iter_time=1.305e-04, forward_time=0.146, loss_ctc=61.606, loss_att=46.689, acc=0.718, loss=51.164, backward_time=1.029, grad_norm=108.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.566e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 01:31:59,663 (trainer:732) INFO: 42epoch:train:3601-3700batch: iter_time=1.609e-04, forward_time=0.146, loss_ctc=67.195, loss_att=48.227, acc=0.733, loss=53.917, backward_time=1.029, grad_norm=126.501, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.565e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 01:34:15,629 (trainer:732) INFO: 42epoch:train:3701-3800batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=76.534, loss_att=56.304, acc=0.712, loss=62.373, backward_time=1.030, grad_norm=117.496, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.564e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 01:36:31,874 (trainer:732) INFO: 42epoch:train:3801-3900batch: iter_time=1.527e-04, forward_time=0.147, loss_ctc=79.688, loss_att=61.818, acc=0.707, loss=67.179, backward_time=1.031, grad_norm=127.586, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.564e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 01:38:47,365 (trainer:732) INFO: 42epoch:train:3901-4000batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=73.126, loss_att=53.845, acc=0.710, loss=59.629, backward_time=1.026, grad_norm=113.224, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.563e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 01:41:03,491 (trainer:732) INFO: 42epoch:train:4001-4100batch: iter_time=1.410e-04, forward_time=0.147, loss_ctc=73.877, loss_att=58.523, acc=0.696, loss=63.129, backward_time=1.030, grad_norm=130.725, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.562e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 01:42:49,251 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 01:43:07,433 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 01:43:10,890 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 01:43:10,891 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-13 01:43:10,897 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 01:48:33,517 (trainer:732) INFO: 42epoch:train:4101-4200batch: iter_time=2.988, forward_time=0.147, loss_ctc=74.164, loss_att=53.456, acc=0.719, loss=59.668, backward_time=1.039, grad_norm=115.908, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.561e-05, train_time=9.000 +[gpub002:0/64] 2023-07-13 01:50:50,772 (trainer:732) INFO: 42epoch:train:4201-4300batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=64.361, loss_att=46.601, acc=0.711, loss=51.929, backward_time=1.033, grad_norm=103.878, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.561e-05, train_time=2.745 +[gpub002:0/64] 2023-07-13 01:53:07,508 (trainer:732) INFO: 42epoch:train:4301-4400batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=61.857, loss_att=46.652, acc=0.722, loss=51.213, backward_time=1.030, grad_norm=101.562, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.560e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 01:55:23,236 (trainer:732) INFO: 42epoch:train:4401-4500batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=69.241, loss_att=48.461, acc=0.741, loss=54.695, backward_time=1.028, grad_norm=128.802, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.559e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 01:57:42,177 (trainer:732) INFO: 42epoch:train:4501-4600batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=80.204, loss_att=58.270, acc=0.708, loss=64.850, backward_time=1.034, grad_norm=105.180, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.559e-05, train_time=2.779 +[gpub002:0/64] 2023-07-13 01:59:58,237 (trainer:732) INFO: 42epoch:train:4601-4700batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=76.178, loss_att=61.031, acc=0.708, loss=65.575, backward_time=1.030, grad_norm=119.146, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.558e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 02:02:14,580 (trainer:732) INFO: 42epoch:train:4701-4800batch: iter_time=1.163e-04, forward_time=0.147, loss_ctc=75.844, loss_att=58.430, acc=0.715, loss=63.654, backward_time=1.034, grad_norm=126.411, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.557e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 02:04:30,380 (trainer:732) INFO: 42epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=73.955, loss_att=54.425, acc=0.710, loss=60.284, backward_time=1.028, grad_norm=110.702, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.557e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 02:06:46,081 (trainer:732) INFO: 42epoch:train:4901-5000batch: iter_time=1.107e-04, forward_time=0.146, loss_ctc=71.515, loss_att=48.756, acc=0.724, loss=55.584, backward_time=1.029, grad_norm=124.882, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.556e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 02:06:47,710 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 02:07:06,009 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 02:07:09,425 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 02:07:09,425 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 02:07:09,432 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 02:14:16,140 (trainer:732) INFO: 42epoch:train:5001-5100batch: iter_time=1.636, forward_time=0.145, loss_ctc=69.401, loss_att=50.532, acc=0.709, loss=56.193, backward_time=1.040, grad_norm=142.315, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.555e-05, train_time=9.001 +[gpub002:0/64] 2023-07-13 02:16:32,200 (trainer:732) INFO: 42epoch:train:5101-5200batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=65.757, loss_att=49.932, acc=0.709, loss=54.679, backward_time=1.028, grad_norm=114.490, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.555e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 02:18:52,290 (trainer:732) INFO: 42epoch:train:5201-5300batch: iter_time=1.255e-04, forward_time=0.144, loss_ctc=64.010, loss_att=44.270, acc=0.738, loss=50.192, backward_time=1.032, grad_norm=105.407, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.554e-05, train_time=2.802 +[gpub002:0/64] 2023-07-13 02:21:08,766 (trainer:732) INFO: 42epoch:train:5301-5400batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=70.481, loss_att=53.258, acc=0.712, loss=58.425, backward_time=1.033, grad_norm=117.124, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.553e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 02:23:24,679 (trainer:732) INFO: 42epoch:train:5401-5500batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=79.697, loss_att=60.747, acc=0.706, loss=66.432, backward_time=1.030, grad_norm=129.480, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.553e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 02:25:41,523 (trainer:732) INFO: 42epoch:train:5501-5600batch: iter_time=1.228e-04, forward_time=0.149, loss_ctc=72.910, loss_att=55.688, acc=0.712, loss=60.854, backward_time=1.030, grad_norm=149.499, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=5.552e-05, train_time=2.737 +[gpub002:0/64] 2023-07-13 02:27:57,260 (trainer:732) INFO: 42epoch:train:5601-5700batch: iter_time=1.284e-04, forward_time=0.145, loss_ctc=77.018, loss_att=59.105, acc=0.700, loss=64.479, backward_time=1.028, grad_norm=131.998, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.551e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 02:30:13,284 (trainer:732) INFO: 42epoch:train:5701-5800batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=73.572, loss_att=53.577, acc=0.722, loss=59.576, backward_time=1.030, grad_norm=103.209, clip=100.000, loss_scale=1.201e+32, optim_step_time=0.182, optim0_lr0=5.551e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 02:31:01,533 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 02:31:19,805 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 02:31:23,527 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 02:31:23,527 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 02:31:23,533 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 02:38:09,529 (trainer:732) INFO: 42epoch:train:5801-5900batch: iter_time=1.672, forward_time=0.223, loss_ctc=66.753, loss_att=45.931, acc=0.710, loss=52.177, backward_time=1.044, grad_norm=115.552, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.550e-05, train_time=9.524 +[gpub002:0/64] 2023-07-13 02:40:27,216 (trainer:732) INFO: 42epoch:train:5901-6000batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=66.955, loss_att=51.180, acc=0.714, loss=55.913, backward_time=1.032, grad_norm=103.524, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.549e-05, train_time=2.754 +[gpub002:0/64] 2023-07-13 02:42:42,739 (trainer:732) INFO: 42epoch:train:6001-6100batch: iter_time=1.289e-04, forward_time=0.144, loss_ctc=62.586, loss_att=45.365, acc=0.728, loss=50.531, backward_time=1.026, grad_norm=94.871, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.548e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 02:44:58,673 (trainer:732) INFO: 42epoch:train:6101-6200batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=70.502, loss_att=52.029, acc=0.726, loss=57.571, backward_time=1.029, grad_norm=120.129, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.548e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 02:47:14,390 (trainer:732) INFO: 42epoch:train:6201-6300batch: iter_time=1.266e-04, forward_time=0.144, loss_ctc=80.294, loss_att=60.866, acc=0.691, loss=66.694, backward_time=1.027, grad_norm=141.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.547e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 02:49:29,945 (trainer:732) INFO: 42epoch:train:6301-6400batch: iter_time=1.154e-04, forward_time=0.144, loss_ctc=74.026, loss_att=55.778, acc=0.718, loss=61.252, backward_time=1.026, grad_norm=131.830, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.546e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 02:51:45,921 (trainer:732) INFO: 42epoch:train:6401-6500batch: iter_time=1.322e-04, forward_time=0.145, loss_ctc=77.487, loss_att=57.948, acc=0.707, loss=63.810, backward_time=1.028, grad_norm=135.680, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.546e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 02:54:01,435 (trainer:732) INFO: 42epoch:train:6501-6600batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=69.023, loss_att=50.317, acc=0.715, loss=55.929, backward_time=1.026, grad_norm=121.503, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.545e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 02:55:35,752 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 02:55:53,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 02:55:57,465 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 02:55:57,465 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 02:55:57,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 03:02:27,111 (trainer:732) INFO: 42epoch:train:6601-6700batch: iter_time=3.598, forward_time=0.208, loss_ctc=68.057, loss_att=47.036, acc=0.718, loss=53.342, backward_time=1.042, grad_norm=118.857, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.544e-05, train_time=10.113 +[gpub002:0/64] 2023-07-13 03:04:44,821 (trainer:732) INFO: 42epoch:train:6701-6800batch: iter_time=1.149e-04, forward_time=0.148, loss_ctc=68.438, loss_att=52.085, acc=0.723, loss=56.991, backward_time=1.034, grad_norm=140.459, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.544e-05, train_time=2.754 +[gpub002:0/64] 2023-07-13 03:07:01,317 (trainer:732) INFO: 42epoch:train:6801-6900batch: iter_time=1.279e-04, forward_time=0.145, loss_ctc=64.735, loss_att=49.425, acc=0.714, loss=54.018, backward_time=1.027, grad_norm=114.945, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.543e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 03:09:19,001 (trainer:732) INFO: 42epoch:train:6901-7000batch: iter_time=0.004, forward_time=0.147, loss_ctc=64.172, loss_att=44.513, acc=0.739, loss=50.411, backward_time=1.035, grad_norm=108.401, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.542e-05, train_time=2.753 +[gpub002:0/64] 2023-07-13 03:11:34,913 (trainer:732) INFO: 42epoch:train:7001-7100batch: iter_time=0.002, forward_time=0.145, loss_ctc=70.024, loss_att=52.180, acc=0.717, loss=57.534, backward_time=1.028, grad_norm=133.620, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.542e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 03:14:01,730 (trainer:732) INFO: 42epoch:train:7101-7200batch: iter_time=1.283e-04, forward_time=0.217, loss_ctc=80.096, loss_att=59.082, acc=0.709, loss=65.386, backward_time=1.048, grad_norm=121.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=5.541e-05, train_time=2.936 +[gpub002:0/64] 2023-07-13 03:16:18,403 (trainer:732) INFO: 42epoch:train:7201-7300batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=72.737, loss_att=55.530, acc=0.720, loss=60.692, backward_time=1.031, grad_norm=126.104, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.540e-05, train_time=2.733 +[gpub002:0/64] 2023-07-13 03:18:34,604 (trainer:732) INFO: 42epoch:train:7301-7400batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=76.887, loss_att=59.261, acc=0.713, loss=64.549, backward_time=1.028, grad_norm=153.123, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.540e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 03:20:50,723 (trainer:732) INFO: 42epoch:train:7401-7500batch: iter_time=1.391e-04, forward_time=0.147, loss_ctc=74.305, loss_att=53.789, acc=0.728, loss=59.944, backward_time=1.028, grad_norm=120.151, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.539e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 03:21:11,975 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 03:21:30,453 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 03:21:33,941 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 03:21:33,941 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 03:21:33,947 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 03:28:50,459 (trainer:732) INFO: 42epoch:train:7501-7600batch: iter_time=3.260, forward_time=0.146, loss_ctc=68.511, loss_att=47.602, acc=0.728, loss=53.875, backward_time=1.046, grad_norm=119.300, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.538e-05, train_time=9.595 +[gpub002:0/64] 2023-07-13 03:31:08,080 (trainer:732) INFO: 42epoch:train:7601-7700batch: iter_time=1.319e-04, forward_time=0.145, loss_ctc=65.544, loss_att=50.965, acc=0.711, loss=55.339, backward_time=1.032, grad_norm=117.013, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.538e-05, train_time=2.752 +[gpub002:0/64] 2023-07-13 03:33:24,531 (trainer:732) INFO: 42epoch:train:7701-7800batch: iter_time=1.504e-04, forward_time=0.147, loss_ctc=59.954, loss_att=44.149, acc=0.726, loss=48.890, backward_time=1.030, grad_norm=111.944, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.537e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 03:35:39,885 (trainer:732) INFO: 42epoch:train:7801-7900batch: iter_time=1.352e-04, forward_time=0.144, loss_ctc=71.306, loss_att=50.202, acc=0.729, loss=56.534, backward_time=1.025, grad_norm=132.190, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.536e-05, train_time=2.707 +[gpub002:0/64] 2023-07-13 03:38:11,937 (trainer:732) INFO: 42epoch:train:7901-8000batch: iter_time=1.097e-04, forward_time=0.144, loss_ctc=81.280, loss_att=59.731, acc=0.695, loss=66.195, backward_time=1.044, grad_norm=116.322, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.536e-05, train_time=3.041 +[gpub002:0/64] 2023-07-13 03:40:27,681 (trainer:732) INFO: 42epoch:train:8001-8100batch: iter_time=1.301e-04, forward_time=0.144, loss_ctc=73.657, loss_att=57.378, acc=0.722, loss=62.262, backward_time=1.028, grad_norm=119.137, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.535e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 03:42:43,340 (trainer:732) INFO: 42epoch:train:8101-8200batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=77.258, loss_att=59.529, acc=0.697, loss=64.848, backward_time=1.029, grad_norm=124.385, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.534e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 03:44:58,939 (trainer:732) INFO: 42epoch:train:8201-8300batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=70.039, loss_att=50.529, acc=0.722, loss=56.382, backward_time=1.028, grad_norm=114.947, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.533e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 03:45:46,940 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-13 03:46:05,424 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 03:46:08,900 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 03:46:08,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-13 03:46:08,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 03:53:07,279 (trainer:732) INFO: 42epoch:train:8301-8400batch: iter_time=1.680, forward_time=0.177, loss_ctc=70.901, loss_att=52.528, acc=0.721, loss=58.040, backward_time=1.041, grad_norm=127.142, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.533e-05, train_time=9.767 +[gpub002:0/64] 2023-07-13 03:55:25,674 (trainer:732) INFO: 42epoch:train:8401-8500batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=63.924, loss_att=47.108, acc=0.710, loss=52.153, backward_time=1.037, grad_norm=118.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.532e-05, train_time=2.768 +[gpub002:0/64] 2023-07-13 03:57:42,100 (trainer:732) INFO: 42epoch:train:8501-8600batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=63.131, loss_att=46.587, acc=0.723, loss=51.551, backward_time=1.029, grad_norm=92.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.531e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 04:00:13,714 (trainer:732) INFO: 42epoch:train:8601-8700batch: iter_time=1.103e-04, forward_time=0.144, loss_ctc=68.642, loss_att=48.513, acc=0.737, loss=54.552, backward_time=1.043, grad_norm=114.432, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.531e-05, train_time=3.032 +[gpub002:0/64] 2023-07-13 04:02:29,944 (trainer:732) INFO: 42epoch:train:8701-8800batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=75.843, loss_att=56.720, acc=0.713, loss=62.457, backward_time=1.028, grad_norm=108.012, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.530e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 04:04:45,889 (trainer:732) INFO: 42epoch:train:8801-8900batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=77.727, loss_att=61.456, acc=0.711, loss=66.338, backward_time=1.029, grad_norm=118.766, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.529e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 04:07:01,510 (trainer:732) INFO: 42epoch:train:8901-9000batch: iter_time=1.245e-04, forward_time=0.144, loss_ctc=72.948, loss_att=53.187, acc=0.715, loss=59.115, backward_time=1.027, grad_norm=137.792, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.529e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 04:09:17,733 (trainer:732) INFO: 42epoch:train:9001-9100batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=75.533, loss_att=57.845, acc=0.699, loss=63.151, backward_time=1.031, grad_norm=119.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.528e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 04:10:50,524 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-13 04:11:08,648 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 04:11:12,048 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 04:11:12,048 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 04:11:12,055 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 04:16:46,199 (trainer:732) INFO: 42epoch:train:9101-9200batch: iter_time=1.637, forward_time=0.145, loss_ctc=71.833, loss_att=48.627, acc=0.732, loss=55.589, backward_time=1.042, grad_norm=104.157, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.527e-05, train_time=8.969 +[gpub002:0/64] 2023-07-13 04:19:03,487 (trainer:732) INFO: 42epoch:train:9201-9300batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=67.367, loss_att=51.468, acc=0.722, loss=56.238, backward_time=1.032, grad_norm=120.743, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.527e-05, train_time=2.746 +[gpub002:0/64] 2023-07-13 04:21:20,258 (trainer:732) INFO: 42epoch:train:9301-9400batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=65.408, loss_att=50.219, acc=0.717, loss=54.776, backward_time=1.030, grad_norm=110.375, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.526e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 04:23:36,660 (trainer:732) INFO: 42epoch:train:9401-9500batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=63.769, loss_att=43.754, acc=0.739, loss=49.759, backward_time=1.025, grad_norm=107.213, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.525e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 04:25:52,274 (trainer:732) INFO: 42epoch:train:9501-9600batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=70.289, loss_att=52.121, acc=0.718, loss=57.571, backward_time=1.025, grad_norm=122.835, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.525e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 04:28:08,506 (trainer:732) INFO: 42epoch:train:9601-9700batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=81.429, loss_att=62.500, acc=0.706, loss=68.178, backward_time=1.028, grad_norm=125.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.524e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 04:30:24,543 (trainer:732) INFO: 42epoch:train:9701-9800batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=71.177, loss_att=53.266, acc=0.725, loss=58.639, backward_time=1.027, grad_norm=126.374, clip=100.000, loss_scale=2.401e+32, optim_step_time=0.181, optim0_lr0=5.523e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 04:32:40,550 (trainer:732) INFO: 42epoch:train:9801-9900batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=75.760, loss_att=57.853, acc=0.719, loss=63.225, backward_time=1.030, grad_norm=129.317, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.523e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 04:34:56,397 (trainer:732) INFO: 42epoch:train:9901-10000batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=73.059, loss_att=52.842, acc=0.731, loss=58.907, backward_time=1.028, grad_norm=109.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.522e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 04:50:54,355 (trainer:338) INFO: 42epoch results: [train] iter_time=0.354, forward_time=0.196, loss_ctc=71.674, loss_att=53.129, acc=0.716, loss=58.693, backward_time=1.042, grad_norm=122.970, clip=100.000, loss_scale=1.303e+32, optim_step_time=0.185, optim0_lr0=5.556e-05, train_time=3.797, time=5 hours, 16 minutes and 39.53 seconds, total_count=390000, gpu_max_cached_mem_GB=37.572, [valid] loss_ctc=44.090, cer_ctc=0.260, loss_att=36.484, acc=0.697, cer=0.342, wer=0.989, loss=38.766, time=6 minutes and 47.34 seconds, total_count=39974, gpu_max_cached_mem_GB=37.572, [att_plot] time=8 minutes and 56.53 seconds, total_count=0, gpu_max_cached_mem_GB=37.572 +[gpub002:0/64] 2023-07-13 04:51:13,901 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub002:0/64] 2023-07-13 04:51:13,945 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/31epoch.pth +[gpub002:0/64] 2023-07-13 04:51:14,020 (trainer:272) INFO: 43/50epoch started. Estimated time to finish: 1 day, 17 hours and 23 minutes +[gpub002:0/64] 2023-07-13 04:51:15,288 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-13 04:51:35,527 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 04:51:39,096 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 04:51:39,097 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 04:51:39,159 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 04:58:53,737 (trainer:732) INFO: 43epoch:train:1-100batch: iter_time=3.165, forward_time=0.177, loss_ctc=67.171, loss_att=52.136, acc=0.696, loss=56.646, backward_time=1.043, grad_norm=125.404, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.521e-05, train_time=9.181 +[gpub002:0/64] 2023-07-13 05:01:09,947 (trainer:732) INFO: 43epoch:train:101-200batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=74.299, loss_att=53.346, acc=0.702, loss=59.632, backward_time=1.031, grad_norm=143.592, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.521e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 05:03:26,486 (trainer:732) INFO: 43epoch:train:201-300batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=72.274, loss_att=51.180, acc=0.712, loss=57.508, backward_time=1.030, grad_norm=127.012, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.520e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 05:05:44,653 (trainer:732) INFO: 43epoch:train:301-400batch: iter_time=1.104e-04, forward_time=0.149, loss_ctc=74.766, loss_att=54.066, acc=0.696, loss=60.276, backward_time=1.031, grad_norm=130.116, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.519e-05, train_time=2.763 +[gpub002:0/64] 2023-07-13 05:08:01,530 (trainer:732) INFO: 43epoch:train:401-500batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.776, loss_att=53.497, acc=0.706, loss=58.381, backward_time=1.029, grad_norm=113.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.519e-05, train_time=2.737 +[gpub002:0/64] 2023-07-13 05:10:24,010 (trainer:732) INFO: 43epoch:train:501-600batch: iter_time=3.155e-04, forward_time=0.146, loss_ctc=78.761, loss_att=55.515, acc=0.699, loss=62.489, backward_time=1.032, grad_norm=125.079, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.518e-05, train_time=2.849 +[gpub002:0/64] 2023-07-13 05:12:54,234 (trainer:732) INFO: 43epoch:train:601-700batch: iter_time=1.062e-04, forward_time=0.144, loss_ctc=71.370, loss_att=43.750, acc=0.721, loss=52.036, backward_time=1.053, grad_norm=126.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.517e-05, train_time=3.004 +[gpub002:0/64] 2023-07-13 05:15:26,303 (trainer:732) INFO: 43epoch:train:701-800batch: iter_time=5.144e-04, forward_time=0.179, loss_ctc=66.772, loss_att=49.511, acc=0.712, loss=54.690, backward_time=1.041, grad_norm=121.595, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.517e-05, train_time=3.041 +[gpub002:0/64] 2023-07-13 05:16:19,570 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 05:16:37,303 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 05:16:40,677 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 05:16:40,677 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 05:16:40,684 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 05:23:58,859 (trainer:732) INFO: 43epoch:train:801-900batch: iter_time=1.808, forward_time=0.186, loss_ctc=68.942, loss_att=51.380, acc=0.705, loss=56.649, backward_time=1.041, grad_norm=122.084, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.516e-05, train_time=10.251 +[gpub002:0/64] 2023-07-13 05:26:15,903 (trainer:732) INFO: 43epoch:train:901-1000batch: iter_time=1.277e-04, forward_time=0.148, loss_ctc=67.154, loss_att=51.145, acc=0.718, loss=55.947, backward_time=1.034, grad_norm=100.494, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.515e-05, train_time=2.741 +[gpub002:0/64] 2023-07-13 05:28:31,874 (trainer:732) INFO: 43epoch:train:1001-1100batch: iter_time=1.304e-04, forward_time=0.145, loss_ctc=74.706, loss_att=53.692, acc=0.714, loss=59.997, backward_time=1.028, grad_norm=134.309, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.515e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 05:30:50,785 (trainer:732) INFO: 43epoch:train:1101-1200batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=70.458, loss_att=49.738, acc=0.704, loss=55.954, backward_time=1.031, grad_norm=126.190, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.514e-05, train_time=2.778 +[gpub002:0/64] 2023-07-13 05:33:07,145 (trainer:732) INFO: 43epoch:train:1201-1300batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=76.640, loss_att=58.441, acc=0.723, loss=63.900, backward_time=1.030, grad_norm=120.803, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.513e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 05:35:42,951 (trainer:732) INFO: 43epoch:train:1301-1400batch: iter_time=0.003, forward_time=0.281, loss_ctc=63.792, loss_att=48.848, acc=0.704, loss=53.331, backward_time=1.064, grad_norm=120.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.193, optim0_lr0=5.513e-05, train_time=3.115 +[gpub002:0/64] 2023-07-13 05:37:59,663 (trainer:732) INFO: 43epoch:train:1401-1500batch: iter_time=1.387e-04, forward_time=0.147, loss_ctc=73.107, loss_att=46.764, acc=0.723, loss=54.667, backward_time=1.030, grad_norm=135.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.512e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 05:40:17,904 (trainer:732) INFO: 43epoch:train:1501-1600batch: iter_time=1.201e-04, forward_time=0.147, loss_ctc=66.824, loss_att=47.833, acc=0.731, loss=53.530, backward_time=1.030, grad_norm=106.942, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.511e-05, train_time=2.765 +[gpub002:0/64] 2023-07-13 05:41:55,175 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 05:42:13,121 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 05:42:16,546 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 05:42:16,546 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 05:42:16,552 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 05:46:42,778 (trainer:732) INFO: 43epoch:train:1601-1700batch: iter_time=2.035, forward_time=0.145, loss_ctc=64.829, loss_att=45.468, acc=0.725, loss=51.276, backward_time=1.045, grad_norm=109.907, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.511e-05, train_time=7.697 +[gpub002:0/64] 2023-07-13 05:49:04,557 (trainer:732) INFO: 43epoch:train:1701-1800batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=68.037, loss_att=51.317, acc=0.717, loss=56.333, backward_time=1.039, grad_norm=110.998, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.510e-05, train_time=2.835 +[gpub002:0/64] 2023-07-13 05:51:20,869 (trainer:732) INFO: 43epoch:train:1801-1900batch: iter_time=1.132e-04, forward_time=0.147, loss_ctc=78.940, loss_att=57.649, acc=0.709, loss=64.036, backward_time=1.028, grad_norm=127.683, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.509e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 05:53:36,717 (trainer:732) INFO: 43epoch:train:1901-2000batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=62.792, loss_att=44.461, acc=0.716, loss=49.960, backward_time=1.025, grad_norm=98.542, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.509e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 05:55:52,706 (trainer:732) INFO: 43epoch:train:2001-2100batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=81.040, loss_att=61.512, acc=0.713, loss=67.371, backward_time=1.029, grad_norm=130.485, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.508e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 05:58:11,248 (trainer:732) INFO: 43epoch:train:2101-2200batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=64.820, loss_att=50.416, acc=0.711, loss=54.738, backward_time=1.030, grad_norm=111.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.507e-05, train_time=2.771 +[gpub002:0/64] 2023-07-13 06:00:26,904 (trainer:732) INFO: 43epoch:train:2201-2300batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=69.248, loss_att=47.166, acc=0.714, loss=53.791, backward_time=1.026, grad_norm=117.367, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.507e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 06:02:46,061 (trainer:732) INFO: 43epoch:train:2301-2400batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=65.678, loss_att=43.020, acc=0.730, loss=49.818, backward_time=1.039, grad_norm=117.099, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.506e-05, train_time=2.783 +[gpub002:0/64] 2023-07-13 06:05:02,779 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 06:05:21,017 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 06:05:24,530 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 06:05:24,530 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 06:05:24,536 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 06:11:05,880 (trainer:732) INFO: 43epoch:train:2401-2500batch: iter_time=1.277, forward_time=0.147, loss_ctc=70.028, loss_att=50.589, acc=0.734, loss=56.420, backward_time=1.041, grad_norm=117.428, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.505e-05, train_time=9.996 +[gpub002:0/64] 2023-07-13 06:13:45,256 (trainer:732) INFO: 43epoch:train:2501-2600batch: iter_time=1.182e-04, forward_time=0.146, loss_ctc=65.700, loss_att=49.932, acc=0.707, loss=54.662, backward_time=1.045, grad_norm=108.564, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.505e-05, train_time=3.187 +[gpub002:0/64] 2023-07-13 06:16:01,175 (trainer:732) INFO: 43epoch:train:2601-2700batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=69.006, loss_att=51.406, acc=0.708, loss=56.686, backward_time=1.029, grad_norm=117.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.504e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 06:18:16,928 (trainer:732) INFO: 43epoch:train:2701-2800batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=68.756, loss_att=49.034, acc=0.720, loss=54.951, backward_time=1.026, grad_norm=113.801, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.503e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 06:20:32,341 (trainer:732) INFO: 43epoch:train:2801-2900batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=74.163, loss_att=54.309, acc=0.702, loss=60.265, backward_time=1.024, grad_norm=120.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.503e-05, train_time=2.708 +[gpub002:0/64] 2023-07-13 06:22:47,910 (trainer:732) INFO: 43epoch:train:2901-3000batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=68.902, loss_att=52.706, acc=0.711, loss=57.565, backward_time=1.027, grad_norm=117.962, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.502e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 06:25:04,075 (trainer:732) INFO: 43epoch:train:3001-3100batch: iter_time=1.264e-04, forward_time=0.148, loss_ctc=72.330, loss_att=53.634, acc=0.706, loss=59.243, backward_time=1.029, grad_norm=126.462, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.501e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 06:27:19,562 (trainer:732) INFO: 43epoch:train:3101-3200batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=64.870, loss_att=42.763, acc=0.727, loss=49.396, backward_time=1.026, grad_norm=109.345, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.501e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 06:29:35,085 (trainer:732) INFO: 43epoch:train:3201-3300batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=66.320, loss_att=48.097, acc=0.724, loss=53.564, backward_time=1.026, grad_norm=116.268, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.500e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 06:30:19,781 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 06:30:37,996 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 06:30:41,419 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 06:30:41,419 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-13 06:30:41,425 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 06:36:35,849 (trainer:732) INFO: 43epoch:train:3301-3400batch: iter_time=1.288, forward_time=0.145, loss_ctc=70.209, loss_att=56.960, acc=0.702, loss=60.934, backward_time=1.040, grad_norm=127.952, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.499e-05, train_time=8.415 +[gpub002:0/64] 2023-07-13 06:38:52,517 (trainer:732) INFO: 43epoch:train:3401-3500batch: iter_time=1.077e-04, forward_time=0.147, loss_ctc=70.143, loss_att=50.603, acc=0.720, loss=56.465, backward_time=1.032, grad_norm=141.257, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.499e-05, train_time=2.733 +[gpub002:0/64] 2023-07-13 06:41:08,598 (trainer:732) INFO: 43epoch:train:3501-3600batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=65.845, loss_att=47.427, acc=0.720, loss=52.953, backward_time=1.031, grad_norm=125.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.498e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 06:43:24,520 (trainer:732) INFO: 43epoch:train:3601-3700batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=77.247, loss_att=55.054, acc=0.710, loss=61.712, backward_time=1.028, grad_norm=118.561, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.497e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 06:45:40,581 (trainer:732) INFO: 43epoch:train:3701-3800batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=65.852, loss_att=50.169, acc=0.722, loss=54.874, backward_time=1.031, grad_norm=106.865, clip=100.000, loss_scale=4.803e+32, optim_step_time=0.182, optim0_lr0=5.497e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 06:47:59,361 (trainer:732) INFO: 43epoch:train:3801-3900batch: iter_time=1.074e-04, forward_time=0.148, loss_ctc=72.644, loss_att=54.032, acc=0.710, loss=59.615, backward_time=1.031, grad_norm=136.386, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.496e-05, train_time=2.775 +[gpub002:0/64] 2023-07-13 06:50:15,237 (trainer:732) INFO: 43epoch:train:3901-4000batch: iter_time=1.063e-04, forward_time=0.147, loss_ctc=64.068, loss_att=40.961, acc=0.734, loss=47.893, backward_time=1.029, grad_norm=130.607, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.495e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 06:52:31,943 (trainer:732) INFO: 43epoch:train:4001-4100batch: iter_time=1.034e-04, forward_time=0.147, loss_ctc=65.865, loss_att=48.470, acc=0.723, loss=53.688, backward_time=1.030, grad_norm=141.896, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.495e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 06:54:15,645 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 06:54:33,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 06:54:37,089 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 06:54:37,089 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 06:54:37,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 06:58:54,596 (trainer:732) INFO: 43epoch:train:4101-4200batch: iter_time=1.344, forward_time=0.226, loss_ctc=68.341, loss_att=49.394, acc=0.720, loss=55.078, backward_time=1.069, grad_norm=135.878, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=5.494e-05, train_time=7.653 +[gpub002:0/64] 2023-07-13 07:01:12,004 (trainer:732) INFO: 43epoch:train:4201-4300batch: iter_time=1.012e-04, forward_time=0.148, loss_ctc=68.518, loss_att=52.143, acc=0.714, loss=57.055, backward_time=1.031, grad_norm=164.714, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.493e-05, train_time=2.748 +[gpub002:0/64] 2023-07-13 07:03:28,068 (trainer:732) INFO: 43epoch:train:4301-4400batch: iter_time=1.019e-04, forward_time=0.146, loss_ctc=78.283, loss_att=56.097, acc=0.717, loss=62.753, backward_time=1.031, grad_norm=131.835, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.493e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 07:05:44,046 (trainer:732) INFO: 43epoch:train:4401-4500batch: iter_time=1.003e-04, forward_time=0.146, loss_ctc=61.875, loss_att=43.498, acc=0.722, loss=49.011, backward_time=1.030, grad_norm=101.796, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.492e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 07:08:00,105 (trainer:732) INFO: 43epoch:train:4501-4600batch: iter_time=9.596e-05, forward_time=0.145, loss_ctc=80.642, loss_att=61.038, acc=0.717, loss=66.919, backward_time=1.029, grad_norm=116.023, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.491e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 07:10:39,483 (trainer:732) INFO: 43epoch:train:4601-4700batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=62.748, loss_att=49.774, acc=0.715, loss=53.666, backward_time=1.071, grad_norm=110.121, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.491e-05, train_time=3.187 +[gpub002:0/64] 2023-07-13 07:12:58,455 (trainer:732) INFO: 43epoch:train:4701-4800batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=69.387, loss_att=46.901, acc=0.713, loss=53.646, backward_time=1.043, grad_norm=122.959, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.490e-05, train_time=2.779 +[gpub002:0/64] 2023-07-13 07:15:14,699 (trainer:732) INFO: 43epoch:train:4801-4900batch: iter_time=1.047e-04, forward_time=0.146, loss_ctc=63.195, loss_att=42.368, acc=0.732, loss=48.616, backward_time=1.029, grad_norm=110.403, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.489e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 07:16:46,779 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 07:17:31,636 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 07:17:49,916 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 07:17:53,361 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 07:17:53,361 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-13 07:17:53,368 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 07:21:12,580 (trainer:732) INFO: 43epoch:train:4901-5000batch: iter_time=1.363, forward_time=0.145, loss_ctc=69.506, loss_att=50.192, acc=0.737, loss=55.987, backward_time=1.033, grad_norm=111.488, clip=100.000, loss_scale=5.431e+32, optim_step_time=0.182, optim0_lr0=5.489e-05, train_time=7.157 +[gpub002:0/64] 2023-07-13 07:23:30,148 (trainer:732) INFO: 43epoch:train:5001-5100batch: iter_time=1.400e-04, forward_time=0.148, loss_ctc=65.881, loss_att=50.693, acc=0.708, loss=55.250, backward_time=1.037, grad_norm=122.810, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.488e-05, train_time=2.751 +[gpub002:0/64] 2023-07-13 07:25:46,716 (trainer:732) INFO: 43epoch:train:5101-5200batch: iter_time=1.426e-04, forward_time=0.147, loss_ctc=67.325, loss_att=50.408, acc=0.713, loss=55.483, backward_time=1.029, grad_norm=122.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.487e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 07:28:02,507 (trainer:732) INFO: 43epoch:train:5201-5300batch: iter_time=1.599e-04, forward_time=0.146, loss_ctc=69.909, loss_att=49.226, acc=0.722, loss=55.431, backward_time=1.029, grad_norm=113.819, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.487e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 07:28:23,967 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 07:30:18,027 (trainer:732) INFO: 43epoch:train:5301-5400batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=73.830, loss_att=53.727, acc=0.707, loss=59.758, backward_time=1.028, grad_norm=126.012, clip=100.000, loss_scale=1.854e+32, optim_step_time=0.182, optim0_lr0=5.486e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 07:32:33,556 (trainer:732) INFO: 43epoch:train:5401-5500batch: iter_time=1.358e-04, forward_time=0.145, loss_ctc=69.457, loss_att=53.006, acc=0.711, loss=57.941, backward_time=1.027, grad_norm=118.602, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.485e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 07:34:49,617 (trainer:732) INFO: 43epoch:train:5501-5600batch: iter_time=1.294e-04, forward_time=0.147, loss_ctc=70.674, loss_att=52.567, acc=0.708, loss=57.999, backward_time=1.029, grad_norm=124.421, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.485e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 07:37:05,125 (trainer:732) INFO: 43epoch:train:5601-5700batch: iter_time=1.538e-04, forward_time=0.146, loss_ctc=64.632, loss_att=42.627, acc=0.728, loss=49.228, backward_time=1.026, grad_norm=117.128, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.484e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 07:39:20,847 (trainer:732) INFO: 43epoch:train:5701-5800batch: iter_time=1.408e-04, forward_time=0.147, loss_ctc=65.046, loss_att=47.284, acc=0.726, loss=52.613, backward_time=1.028, grad_norm=118.675, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.483e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 07:40:20,191 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 07:40:38,345 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 07:40:41,837 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 07:40:41,837 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 07:40:41,843 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 07:46:21,327 (trainer:732) INFO: 43epoch:train:5801-5900batch: iter_time=2.635, forward_time=0.147, loss_ctc=69.559, loss_att=55.942, acc=0.705, loss=60.027, backward_time=1.049, grad_norm=111.034, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.483e-05, train_time=8.409 +[gpub002:0/64] 2023-07-13 07:48:37,870 (trainer:732) INFO: 43epoch:train:5901-6000batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=71.663, loss_att=52.278, acc=0.708, loss=58.094, backward_time=1.029, grad_norm=167.485, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.482e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 07:50:53,784 (trainer:732) INFO: 43epoch:train:6001-6100batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=65.614, loss_att=46.575, acc=0.717, loss=52.287, backward_time=1.029, grad_norm=119.203, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.481e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 07:53:27,460 (trainer:732) INFO: 43epoch:train:6101-6200batch: iter_time=1.129e-04, forward_time=0.144, loss_ctc=75.060, loss_att=54.210, acc=0.707, loss=60.465, backward_time=1.046, grad_norm=113.546, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.481e-05, train_time=3.073 +[gpub002:0/64] 2023-07-13 07:55:51,083 (trainer:732) INFO: 43epoch:train:6201-6300batch: iter_time=3.027e-04, forward_time=0.179, loss_ctc=66.883, loss_att=50.860, acc=0.711, loss=55.667, backward_time=1.034, grad_norm=113.637, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.480e-05, train_time=2.872 +[gpub002:0/64] 2023-07-13 07:58:09,272 (trainer:732) INFO: 43epoch:train:6301-6400batch: iter_time=1.044e-04, forward_time=0.163, loss_ctc=73.490, loss_att=54.979, acc=0.704, loss=60.532, backward_time=1.030, grad_norm=134.206, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.479e-05, train_time=2.764 +[gpub002:0/64] 2023-07-13 08:00:24,736 (trainer:732) INFO: 43epoch:train:6401-6500batch: iter_time=1.026e-04, forward_time=0.144, loss_ctc=65.033, loss_att=41.165, acc=0.730, loss=48.326, backward_time=1.025, grad_norm=125.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.479e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 08:02:40,579 (trainer:732) INFO: 43epoch:train:6501-6600batch: iter_time=1.147e-04, forward_time=0.144, loss_ctc=67.103, loss_att=47.658, acc=0.719, loss=53.491, backward_time=1.028, grad_norm=121.695, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.478e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 08:04:11,900 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 08:04:30,016 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 08:04:33,389 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 08:04:33,389 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-13 08:04:33,395 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 08:09:38,546 (trainer:732) INFO: 43epoch:train:6601-6700batch: iter_time=1.336, forward_time=0.186, loss_ctc=69.082, loss_att=51.109, acc=0.714, loss=56.501, backward_time=1.038, grad_norm=108.294, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.477e-05, train_time=8.359 +[gpub002:0/64] 2023-07-13 08:10:34,386 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 08:12:01,808 (trainer:732) INFO: 43epoch:train:6701-6800batch: iter_time=1.327e-04, forward_time=0.145, loss_ctc=67.704, loss_att=51.773, acc=0.711, loss=56.552, backward_time=1.037, grad_norm=130.635, clip=100.000, loss_scale=1.109e+32, optim_step_time=0.182, optim0_lr0=5.477e-05, train_time=2.865 +[gpub002:0/64] 2023-07-13 08:14:23,253 (trainer:732) INFO: 43epoch:train:6801-6900batch: iter_time=1.293e-04, forward_time=0.145, loss_ctc=77.182, loss_att=56.548, acc=0.705, loss=62.738, backward_time=1.042, grad_norm=118.840, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.476e-05, train_time=2.829 +[gpub002:0/64] 2023-07-13 08:16:48,767 (trainer:732) INFO: 43epoch:train:6901-7000batch: iter_time=1.451e-04, forward_time=0.145, loss_ctc=60.933, loss_att=43.482, acc=0.718, loss=48.717, backward_time=1.039, grad_norm=142.200, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.476e-05, train_time=2.910 +[gpub002:0/64] 2023-07-13 08:19:18,077 (trainer:732) INFO: 43epoch:train:7001-7100batch: iter_time=1.466e-04, forward_time=0.146, loss_ctc=80.225, loss_att=60.938, acc=0.714, loss=66.724, backward_time=1.043, grad_norm=128.693, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.475e-05, train_time=2.986 +[gpub002:0/64] 2023-07-13 08:21:41,636 (trainer:732) INFO: 43epoch:train:7101-7200batch: iter_time=1.517e-04, forward_time=0.146, loss_ctc=62.221, loss_att=49.382, acc=0.708, loss=53.234, backward_time=1.042, grad_norm=137.390, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.474e-05, train_time=2.871 +[gpub002:0/64] 2023-07-13 08:24:09,625 (trainer:732) INFO: 43epoch:train:7201-7300batch: iter_time=1.485e-04, forward_time=0.145, loss_ctc=69.377, loss_att=47.292, acc=0.709, loss=53.917, backward_time=1.058, grad_norm=123.920, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.474e-05, train_time=2.960 +[gpub002:0/64] 2023-07-13 08:26:25,316 (trainer:732) INFO: 43epoch:train:7301-7400batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=62.734, loss_att=42.130, acc=0.731, loss=48.311, backward_time=1.028, grad_norm=112.463, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.473e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 08:28:42,136 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 08:29:00,185 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 08:29:03,836 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 08:29:03,836 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 08:29:03,842 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 08:31:55,060 (trainer:732) INFO: 43epoch:train:7401-7500batch: iter_time=1.410, forward_time=0.173, loss_ctc=70.908, loss_att=52.086, acc=0.730, loss=57.732, backward_time=1.031, grad_norm=103.567, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.472e-05, train_time=6.595 +[gpub002:0/64] 2023-07-13 08:34:14,214 (trainer:732) INFO: 43epoch:train:7501-7600batch: iter_time=1.180e-04, forward_time=0.147, loss_ctc=66.889, loss_att=51.014, acc=0.718, loss=55.777, backward_time=1.039, grad_norm=122.067, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.472e-05, train_time=2.783 +[gpub002:0/64] 2023-07-13 08:36:31,213 (trainer:732) INFO: 43epoch:train:7601-7700batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=76.393, loss_att=55.664, acc=0.714, loss=61.882, backward_time=1.029, grad_norm=138.860, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.471e-05, train_time=2.740 +[gpub002:0/64] 2023-07-13 08:38:47,398 (trainer:732) INFO: 43epoch:train:7701-7800batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=62.356, loss_att=44.020, acc=0.725, loss=49.521, backward_time=1.030, grad_norm=118.319, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.470e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 08:41:05,554 (trainer:732) INFO: 43epoch:train:7801-7900batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=79.362, loss_att=60.799, acc=0.719, loss=66.368, backward_time=1.031, grad_norm=129.499, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.470e-05, train_time=2.763 +[gpub002:0/64] 2023-07-13 08:43:32,372 (trainer:732) INFO: 43epoch:train:7901-8000batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=62.046, loss_att=49.851, acc=0.708, loss=53.509, backward_time=1.046, grad_norm=124.743, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.469e-05, train_time=2.936 +[gpub002:0/64] 2023-07-13 08:45:48,570 (trainer:732) INFO: 43epoch:train:8001-8100batch: iter_time=1.225e-04, forward_time=0.144, loss_ctc=67.225, loss_att=44.208, acc=0.725, loss=51.113, backward_time=1.027, grad_norm=118.012, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.468e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 08:48:07,719 (trainer:732) INFO: 43epoch:train:8101-8200batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=64.666, loss_att=43.302, acc=0.727, loss=49.711, backward_time=1.038, grad_norm=118.883, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.468e-05, train_time=2.783 +[gpub002:0/64] 2023-07-13 08:50:32,893 (trainer:732) INFO: 43epoch:train:8201-8300batch: iter_time=1.252e-04, forward_time=0.145, loss_ctc=70.428, loss_att=52.218, acc=0.734, loss=57.681, backward_time=1.049, grad_norm=109.704, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.467e-05, train_time=2.903 +[gpub002:0/64] 2023-07-13 08:51:36,412 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-13 08:51:54,619 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 08:51:58,008 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 08:51:58,008 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 08:51:58,097 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 08:58:16,119 (trainer:732) INFO: 43epoch:train:8301-8400batch: iter_time=3.044, forward_time=0.205, loss_ctc=68.169, loss_att=54.539, acc=0.704, loss=58.628, backward_time=1.054, grad_norm=109.947, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=5.466e-05, train_time=9.264 +[gpub002:0/64] 2023-07-13 09:00:33,779 (trainer:732) INFO: 43epoch:train:8401-8500batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=71.006, loss_att=51.275, acc=0.715, loss=57.194, backward_time=1.029, grad_norm=111.833, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.466e-05, train_time=2.753 +[gpub002:0/64] 2023-07-13 09:02:50,313 (trainer:732) INFO: 43epoch:train:8501-8600batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=65.216, loss_att=46.304, acc=0.720, loss=51.978, backward_time=1.031, grad_norm=114.151, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.465e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 09:05:11,317 (trainer:732) INFO: 43epoch:train:8601-8700batch: iter_time=1.264e-04, forward_time=0.144, loss_ctc=75.105, loss_att=54.233, acc=0.710, loss=60.494, backward_time=1.034, grad_norm=141.900, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.464e-05, train_time=2.820 +[gpub002:0/64] 2023-07-13 09:07:26,944 (trainer:732) INFO: 43epoch:train:8701-8800batch: iter_time=1.226e-04, forward_time=0.144, loss_ctc=65.886, loss_att=50.674, acc=0.710, loss=55.238, backward_time=1.027, grad_norm=114.949, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.464e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 09:09:54,294 (trainer:732) INFO: 43epoch:train:8801-8900batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=72.328, loss_att=54.323, acc=0.708, loss=59.725, backward_time=1.040, grad_norm=129.091, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.463e-05, train_time=2.947 +[gpub002:0/64] 2023-07-13 09:12:09,955 (trainer:732) INFO: 43epoch:train:8901-9000batch: iter_time=1.209e-04, forward_time=0.144, loss_ctc=62.790, loss_att=40.413, acc=0.733, loss=47.126, backward_time=1.026, grad_norm=123.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.462e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 09:14:30,267 (trainer:732) INFO: 43epoch:train:9001-9100batch: iter_time=1.184e-04, forward_time=0.144, loss_ctc=65.546, loss_att=47.434, acc=0.723, loss=52.868, backward_time=1.030, grad_norm=126.202, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.462e-05, train_time=2.806 +[gpub002:0/64] 2023-07-13 09:16:03,133 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-13 09:16:21,262 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 09:16:24,730 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 09:16:24,730 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 09:16:24,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 09:19:55,353 (trainer:732) INFO: 43epoch:train:9101-9200batch: iter_time=1.299, forward_time=0.166, loss_ctc=68.022, loss_att=50.137, acc=0.719, loss=55.502, backward_time=1.039, grad_norm=114.933, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.461e-05, train_time=6.502 +[gpub002:0/64] 2023-07-13 09:22:18,425 (trainer:732) INFO: 43epoch:train:9201-9300batch: iter_time=0.002, forward_time=0.182, loss_ctc=67.757, loss_att=51.479, acc=0.719, loss=56.362, backward_time=1.044, grad_norm=114.798, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.184, optim0_lr0=5.460e-05, train_time=2.861 +[gpub002:0/64] 2023-07-13 09:24:35,968 (trainer:732) INFO: 43epoch:train:9301-9400batch: iter_time=1.123e-04, forward_time=0.148, loss_ctc=75.353, loss_att=56.327, acc=0.716, loss=62.035, backward_time=1.030, grad_norm=143.157, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.460e-05, train_time=2.751 +[gpub002:0/64] 2023-07-13 09:26:52,066 (trainer:732) INFO: 43epoch:train:9401-9500batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=63.167, loss_att=43.706, acc=0.722, loss=49.544, backward_time=1.028, grad_norm=124.127, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.459e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 09:29:08,787 (trainer:732) INFO: 43epoch:train:9501-9600batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=78.082, loss_att=59.968, acc=0.719, loss=65.402, backward_time=1.029, grad_norm=139.813, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.459e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 09:31:24,647 (trainer:732) INFO: 43epoch:train:9601-9700batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=62.075, loss_att=49.317, acc=0.717, loss=53.145, backward_time=1.028, grad_norm=129.904, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.458e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 09:33:40,464 (trainer:732) INFO: 43epoch:train:9701-9800batch: iter_time=1.205e-04, forward_time=0.145, loss_ctc=67.821, loss_att=46.388, acc=0.718, loss=52.818, backward_time=1.029, grad_norm=116.293, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.457e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 09:35:56,258 (trainer:732) INFO: 43epoch:train:9801-9900batch: iter_time=1.285e-04, forward_time=0.146, loss_ctc=62.427, loss_att=40.698, acc=0.739, loss=47.217, backward_time=1.028, grad_norm=116.885, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.457e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 09:38:11,996 (trainer:732) INFO: 43epoch:train:9901-10000batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=69.128, loss_att=50.402, acc=0.737, loss=56.020, backward_time=1.028, grad_norm=113.548, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.456e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 09:51:01,601 (trainer:338) INFO: 43epoch results: [train] iter_time=0.220, forward_time=0.151, loss_ctc=69.174, loss_att=50.360, acc=0.716, loss=56.005, backward_time=1.034, grad_norm=122.264, clip=100.000, loss_scale=2.614e+32, optim_step_time=0.182, optim0_lr0=5.488e-05, train_time=3.443, time=4 hours, 47 minutes and 9.28 seconds, total_count=400000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=44.035, cer_ctc=0.257, loss_att=36.329, acc=0.704, cer=0.323, wer=0.986, loss=38.641, time=6 minutes and 40.54 seconds, total_count=40986, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 57.64 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-13 09:51:20,539 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub002:0/64] 2023-07-13 09:51:20,601 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/23epoch.pth +[gpub002:0/64] 2023-07-13 09:51:20,630 (trainer:272) INFO: 44/50epoch started. Estimated time to finish: 1 day, 11 hours and 55 minutes +[gpub002:0/64] 2023-07-13 09:51:21,201 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-13 09:51:40,196 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 09:51:43,685 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 09:51:43,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 09:51:43,691 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 09:59:49,160 (trainer:732) INFO: 44epoch:train:1-100batch: iter_time=3.652, forward_time=0.172, loss_ctc=71.282, loss_att=54.434, acc=0.717, loss=59.488, backward_time=1.044, grad_norm=125.239, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.455e-05, train_time=10.160 +[gpub002:0/64] 2023-07-13 10:02:06,233 (trainer:732) INFO: 44epoch:train:101-200batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=74.775, loss_att=56.050, acc=0.700, loss=61.668, backward_time=1.031, grad_norm=122.349, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.455e-05, train_time=2.742 +[gpub002:0/64] 2023-07-13 10:04:22,775 (trainer:732) INFO: 44epoch:train:201-300batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=87.979, loss_att=66.421, acc=0.708, loss=72.889, backward_time=1.032, grad_norm=117.444, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.454e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 10:06:38,707 (trainer:732) INFO: 44epoch:train:301-400batch: iter_time=1.104e-04, forward_time=0.144, loss_ctc=70.492, loss_att=49.371, acc=0.696, loss=55.707, backward_time=1.027, grad_norm=120.741, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.453e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 10:08:58,696 (trainer:732) INFO: 44epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=76.164, loss_att=60.005, acc=0.695, loss=64.852, backward_time=1.030, grad_norm=120.304, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.453e-05, train_time=2.800 +[gpub002:0/64] 2023-07-13 10:11:18,222 (trainer:732) INFO: 44epoch:train:501-600batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=86.092, loss_att=56.917, acc=0.713, loss=65.670, backward_time=1.031, grad_norm=137.885, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.452e-05, train_time=2.790 +[gpub002:0/64] 2023-07-13 10:13:52,188 (trainer:732) INFO: 44epoch:train:601-700batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=72.733, loss_att=54.095, acc=0.720, loss=59.686, backward_time=1.042, grad_norm=123.779, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.451e-05, train_time=3.079 +[gpub002:0/64] 2023-07-13 10:16:22,649 (trainer:732) INFO: 44epoch:train:701-800batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=76.359, loss_att=63.914, acc=0.695, loss=67.647, backward_time=1.055, grad_norm=120.020, clip=100.000, loss_scale=1.314e+32, optim_step_time=0.182, optim0_lr0=5.451e-05, train_time=3.009 +[gpub002:0/64] 2023-07-13 10:17:24,426 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 10:17:42,352 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 10:17:45,723 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 10:17:45,723 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 10:17:45,730 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 10:24:20,092 (trainer:732) INFO: 44epoch:train:801-900batch: iter_time=3.299, forward_time=0.177, loss_ctc=75.207, loss_att=54.676, acc=0.718, loss=60.835, backward_time=1.047, grad_norm=125.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.450e-05, train_time=9.548 +[gpub002:0/64] 2023-07-13 10:26:36,519 (trainer:732) INFO: 44epoch:train:901-1000batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=74.822, loss_att=57.463, acc=0.693, loss=62.670, backward_time=1.028, grad_norm=130.012, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.449e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 10:28:53,173 (trainer:732) INFO: 44epoch:train:1001-1100batch: iter_time=1.127e-04, forward_time=0.147, loss_ctc=83.374, loss_att=63.558, acc=0.698, loss=69.502, backward_time=1.030, grad_norm=144.799, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.449e-05, train_time=2.733 +[gpub002:0/64] 2023-07-13 10:31:09,555 (trainer:732) INFO: 44epoch:train:1101-1200batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=77.286, loss_att=52.646, acc=0.715, loss=60.038, backward_time=1.031, grad_norm=129.265, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.448e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 10:33:25,148 (trainer:732) INFO: 44epoch:train:1201-1300batch: iter_time=1.248e-04, forward_time=0.143, loss_ctc=71.158, loss_att=53.092, acc=0.697, loss=58.512, backward_time=1.026, grad_norm=97.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.447e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 10:35:40,885 (trainer:732) INFO: 44epoch:train:1301-1400batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=79.055, loss_att=55.984, acc=0.699, loss=62.905, backward_time=1.028, grad_norm=113.589, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.447e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 10:37:56,583 (trainer:732) INFO: 44epoch:train:1401-1500batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=76.489, loss_att=55.112, acc=0.717, loss=61.525, backward_time=1.027, grad_norm=124.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.446e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 10:40:12,676 (trainer:732) INFO: 44epoch:train:1501-1600batch: iter_time=1.402e-04, forward_time=0.145, loss_ctc=71.799, loss_att=59.313, acc=0.689, loss=63.059, backward_time=1.031, grad_norm=120.829, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.446e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 10:41:44,379 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 10:42:02,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 10:42:06,277 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 10:42:06,277 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 10:42:06,283 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 10:45:52,981 (trainer:732) INFO: 44epoch:train:1601-1700batch: iter_time=1.287, forward_time=0.144, loss_ctc=80.146, loss_att=60.139, acc=0.710, loss=66.141, backward_time=1.038, grad_norm=125.126, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.445e-05, train_time=6.806 +[gpub002:0/64] 2023-07-13 10:48:09,910 (trainer:732) INFO: 44epoch:train:1701-1800batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=70.303, loss_att=54.003, acc=0.711, loss=58.893, backward_time=1.033, grad_norm=114.713, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.444e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 10:50:25,669 (trainer:732) INFO: 44epoch:train:1801-1900batch: iter_time=1.468e-04, forward_time=0.146, loss_ctc=76.075, loss_att=61.114, acc=0.693, loss=65.602, backward_time=1.028, grad_norm=138.441, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.444e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 10:52:41,672 (trainer:732) INFO: 44epoch:train:1901-2000batch: iter_time=1.075e-04, forward_time=0.144, loss_ctc=83.849, loss_att=56.957, acc=0.718, loss=65.025, backward_time=1.029, grad_norm=117.446, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.443e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 10:54:57,290 (trainer:732) INFO: 44epoch:train:2001-2100batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=67.877, loss_att=48.882, acc=0.702, loss=54.581, backward_time=1.026, grad_norm=113.885, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.442e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 10:57:13,426 (trainer:732) INFO: 44epoch:train:2101-2200batch: iter_time=9.840e-05, forward_time=0.144, loss_ctc=81.071, loss_att=59.103, acc=0.698, loss=65.693, backward_time=1.029, grad_norm=131.853, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.442e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 10:59:32,038 (trainer:732) INFO: 44epoch:train:2201-2300batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=74.573, loss_att=53.223, acc=0.709, loss=59.628, backward_time=1.030, grad_norm=119.358, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.441e-05, train_time=2.772 +[gpub002:0/64] 2023-07-13 11:01:50,330 (trainer:732) INFO: 44epoch:train:2301-2400batch: iter_time=1.018e-04, forward_time=0.145, loss_ctc=75.625, loss_att=60.504, acc=0.695, loss=65.040, backward_time=1.041, grad_norm=128.773, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.440e-05, train_time=2.766 +[gpub002:0/64] 2023-07-13 11:04:07,084 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 11:04:25,422 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 11:04:28,900 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 11:04:28,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-13 11:04:28,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 11:08:22,215 (trainer:732) INFO: 44epoch:train:2401-2500batch: iter_time=1.295, forward_time=0.144, loss_ctc=78.132, loss_att=62.868, acc=0.704, loss=67.447, backward_time=1.036, grad_norm=137.248, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.440e-05, train_time=7.837 +[gpub002:0/64] 2023-07-13 11:10:40,796 (trainer:732) INFO: 44epoch:train:2501-2600batch: iter_time=1.172e-04, forward_time=0.144, loss_ctc=67.608, loss_att=49.652, acc=0.723, loss=55.039, backward_time=1.036, grad_norm=147.316, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.439e-05, train_time=2.771 +[gpub002:0/64] 2023-07-13 11:12:56,584 (trainer:732) INFO: 44epoch:train:2601-2700batch: iter_time=1.208e-04, forward_time=0.144, loss_ctc=73.380, loss_att=58.680, acc=0.696, loss=63.090, backward_time=1.029, grad_norm=223.909, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.438e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 11:15:12,427 (trainer:732) INFO: 44epoch:train:2701-2800batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=86.240, loss_att=61.095, acc=0.709, loss=68.639, backward_time=1.027, grad_norm=160.636, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.438e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 11:17:27,987 (trainer:732) INFO: 44epoch:train:2801-2900batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=68.542, loss_att=47.662, acc=0.704, loss=53.926, backward_time=1.026, grad_norm=161.052, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.437e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 11:19:43,703 (trainer:732) INFO: 44epoch:train:2901-3000batch: iter_time=1.479e-04, forward_time=0.145, loss_ctc=74.838, loss_att=58.199, acc=0.693, loss=63.191, backward_time=1.028, grad_norm=127.448, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.437e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 11:21:59,433 (trainer:732) INFO: 44epoch:train:3001-3100batch: iter_time=1.218e-04, forward_time=0.144, loss_ctc=85.103, loss_att=56.031, acc=0.712, loss=64.753, backward_time=1.027, grad_norm=130.375, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.436e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 11:24:15,402 (trainer:732) INFO: 44epoch:train:3101-3200batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=70.848, loss_att=54.267, acc=0.711, loss=59.241, backward_time=1.029, grad_norm=107.471, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.435e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 11:26:31,345 (trainer:732) INFO: 44epoch:train:3201-3300batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=78.308, loss_att=65.274, acc=0.688, loss=69.184, backward_time=1.030, grad_norm=163.070, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.435e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 11:27:16,348 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 11:27:34,316 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 11:27:37,764 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 11:27:37,764 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 11:27:37,771 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 11:34:38,710 (trainer:732) INFO: 44epoch:train:3301-3400batch: iter_time=1.305, forward_time=0.144, loss_ctc=71.977, loss_att=52.228, acc=0.724, loss=58.153, backward_time=1.042, grad_norm=140.230, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.434e-05, train_time=9.747 +[gpub002:0/64] 2023-07-13 11:36:55,659 (trainer:732) INFO: 44epoch:train:3401-3500batch: iter_time=1.283e-04, forward_time=0.145, loss_ctc=73.853, loss_att=55.371, acc=0.710, loss=60.915, backward_time=1.029, grad_norm=146.633, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.433e-05, train_time=2.739 +[gpub002:0/64] 2023-07-13 11:39:16,602 (trainer:732) INFO: 44epoch:train:3501-3600batch: iter_time=1.104e-04, forward_time=0.146, loss_ctc=82.166, loss_att=61.239, acc=0.713, loss=67.517, backward_time=1.055, grad_norm=156.465, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.193, optim0_lr0=5.433e-05, train_time=2.819 +[gpub002:0/64] 2023-07-13 11:41:35,797 (trainer:732) INFO: 44epoch:train:3601-3700batch: iter_time=1.001e-04, forward_time=0.146, loss_ctc=76.586, loss_att=51.918, acc=0.716, loss=59.318, backward_time=1.044, grad_norm=124.601, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.193, optim0_lr0=5.432e-05, train_time=2.784 +[gpub002:0/64] 2023-07-13 11:43:51,423 (trainer:732) INFO: 44epoch:train:3701-3800batch: iter_time=9.900e-05, forward_time=0.145, loss_ctc=69.296, loss_att=53.491, acc=0.701, loss=58.232, backward_time=1.027, grad_norm=124.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.431e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 11:46:07,819 (trainer:732) INFO: 44epoch:train:3801-3900batch: iter_time=1.039e-04, forward_time=0.147, loss_ctc=78.553, loss_att=56.210, acc=0.707, loss=62.913, backward_time=1.031, grad_norm=124.973, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.431e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 11:48:28,053 (trainer:732) INFO: 44epoch:train:3901-4000batch: iter_time=1.387e-04, forward_time=0.148, loss_ctc=74.950, loss_att=55.969, acc=0.726, loss=61.664, backward_time=1.037, grad_norm=137.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.430e-05, train_time=2.804 +[gpub002:0/64] 2023-07-13 11:50:48,088 (trainer:732) INFO: 44epoch:train:4001-4100batch: iter_time=1.290e-04, forward_time=0.148, loss_ctc=70.087, loss_att=56.682, acc=0.705, loss=60.703, backward_time=1.035, grad_norm=149.974, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.429e-05, train_time=2.800 +[gpub002:0/64] 2023-07-13 11:52:19,107 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 11:52:37,276 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 11:52:40,687 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 11:52:40,687 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 11:52:40,693 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 11:59:04,406 (trainer:732) INFO: 44epoch:train:4101-4200batch: iter_time=1.307, forward_time=0.146, loss_ctc=78.980, loss_att=60.456, acc=0.713, loss=66.013, backward_time=1.053, grad_norm=139.801, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.429e-05, train_time=9.926 +[gpub002:0/64] 2023-07-13 12:01:20,686 (trainer:732) INFO: 44epoch:train:4201-4300batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=70.147, loss_att=53.310, acc=0.720, loss=58.362, backward_time=1.030, grad_norm=116.468, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.428e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 12:03:37,446 (trainer:732) INFO: 44epoch:train:4301-4400batch: iter_time=1.102e-04, forward_time=0.145, loss_ctc=76.582, loss_att=59.622, acc=0.708, loss=64.710, backward_time=1.032, grad_norm=126.272, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.428e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 12:05:53,280 (trainer:732) INFO: 44epoch:train:4401-4500batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=82.415, loss_att=56.209, acc=0.724, loss=64.071, backward_time=1.028, grad_norm=129.616, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.427e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 12:08:08,646 (trainer:732) INFO: 44epoch:train:4501-4600batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=65.252, loss_att=46.355, acc=0.705, loss=52.024, backward_time=1.024, grad_norm=116.158, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.426e-05, train_time=2.707 +[gpub002:0/64] 2023-07-13 12:10:24,749 (trainer:732) INFO: 44epoch:train:4601-4700batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=80.559, loss_att=58.522, acc=0.706, loss=65.133, backward_time=1.030, grad_norm=160.477, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.426e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 12:12:40,507 (trainer:732) INFO: 44epoch:train:4701-4800batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=76.103, loss_att=53.892, acc=0.722, loss=60.555, backward_time=1.027, grad_norm=118.492, clip=100.000, loss_scale=2.629e+32, optim_step_time=0.181, optim0_lr0=5.425e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 12:14:56,545 (trainer:732) INFO: 44epoch:train:4801-4900batch: iter_time=1.162e-04, forward_time=0.146, loss_ctc=76.099, loss_att=61.644, acc=0.708, loss=65.980, backward_time=1.030, grad_norm=121.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.424e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 12:17:11,370 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 12:17:29,722 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 12:17:33,178 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 12:17:33,178 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 12:17:33,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 12:23:24,781 (trainer:732) INFO: 44epoch:train:4901-5000batch: iter_time=1.333, forward_time=0.145, loss_ctc=75.905, loss_att=57.847, acc=0.711, loss=63.264, backward_time=1.039, grad_norm=126.648, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.424e-05, train_time=10.165 +[gpub002:0/64] 2023-07-13 12:25:45,045 (trainer:732) INFO: 44epoch:train:5001-5100batch: iter_time=1.084e-04, forward_time=0.146, loss_ctc=70.805, loss_att=55.156, acc=0.702, loss=59.850, backward_time=1.041, grad_norm=116.577, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.423e-05, train_time=2.805 +[gpub002:0/64] 2023-07-13 12:28:00,589 (trainer:732) INFO: 44epoch:train:5101-5200batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=78.881, loss_att=61.680, acc=0.692, loss=66.840, backward_time=1.027, grad_norm=123.318, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.422e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 12:30:16,542 (trainer:732) INFO: 44epoch:train:5201-5300batch: iter_time=1.268e-04, forward_time=0.147, loss_ctc=80.888, loss_att=54.612, acc=0.726, loss=62.495, backward_time=1.029, grad_norm=126.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.422e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 12:32:32,337 (trainer:732) INFO: 44epoch:train:5301-5400batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=65.520, loss_att=48.765, acc=0.704, loss=53.792, backward_time=1.028, grad_norm=115.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.421e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 12:34:59,810 (trainer:732) INFO: 44epoch:train:5401-5500batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=77.603, loss_att=55.499, acc=0.696, loss=62.130, backward_time=1.038, grad_norm=131.812, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.421e-05, train_time=2.949 +[gpub002:0/64] 2023-07-13 12:37:15,536 (trainer:732) INFO: 44epoch:train:5501-5600batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=73.151, loss_att=52.695, acc=0.714, loss=58.832, backward_time=1.027, grad_norm=108.368, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.420e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 12:39:31,588 (trainer:732) INFO: 44epoch:train:5601-5700batch: iter_time=1.156e-04, forward_time=0.145, loss_ctc=77.041, loss_att=62.776, acc=0.692, loss=67.056, backward_time=1.030, grad_norm=117.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.419e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 12:41:47,589 (trainer:732) INFO: 44epoch:train:5701-5800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=75.964, loss_att=57.575, acc=0.711, loss=63.092, backward_time=1.029, grad_norm=131.182, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.419e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 12:42:32,777 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 12:42:51,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 12:42:54,458 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 12:42:54,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 12:42:54,464 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 12:48:25,155 (trainer:732) INFO: 44epoch:train:5801-5900batch: iter_time=1.344, forward_time=0.188, loss_ctc=69.328, loss_att=52.361, acc=0.722, loss=57.451, backward_time=1.039, grad_norm=118.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.418e-05, train_time=7.951 +[gpub002:0/64] 2023-07-13 12:50:41,794 (trainer:732) INFO: 44epoch:train:5901-6000batch: iter_time=1.069e-04, forward_time=0.145, loss_ctc=71.719, loss_att=54.368, acc=0.713, loss=59.573, backward_time=1.028, grad_norm=148.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.417e-05, train_time=2.733 +[gpub002:0/64] 2023-07-13 12:52:58,663 (trainer:732) INFO: 44epoch:train:6001-6100batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=82.110, loss_att=60.893, acc=0.716, loss=67.258, backward_time=1.032, grad_norm=162.632, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.417e-05, train_time=2.737 +[gpub002:0/64] 2023-07-13 12:55:14,739 (trainer:732) INFO: 44epoch:train:6101-6200batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=76.069, loss_att=50.970, acc=0.722, loss=58.499, backward_time=1.030, grad_norm=127.718, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.416e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 12:57:31,152 (trainer:732) INFO: 44epoch:train:6201-6300batch: iter_time=1.203e-04, forward_time=0.147, loss_ctc=68.497, loss_att=51.883, acc=0.704, loss=56.867, backward_time=1.031, grad_norm=116.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.415e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 12:59:47,343 (trainer:732) INFO: 44epoch:train:6301-6400batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=79.729, loss_att=55.182, acc=0.711, loss=62.546, backward_time=1.030, grad_norm=126.405, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.415e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 13:02:03,500 (trainer:732) INFO: 44epoch:train:6401-6500batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=74.743, loss_att=55.259, acc=0.729, loss=61.104, backward_time=1.030, grad_norm=129.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.414e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 13:04:21,478 (trainer:732) INFO: 44epoch:train:6501-6600batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=72.287, loss_att=56.876, acc=0.707, loss=61.499, backward_time=1.035, grad_norm=121.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.414e-05, train_time=2.759 +[gpub002:0/64] 2023-07-13 13:06:07,952 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 13:06:26,685 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 13:06:30,127 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 13:06:30,127 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 13:06:30,133 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 13:10:55,875 (trainer:732) INFO: 44epoch:train:6601-6700batch: iter_time=2.456, forward_time=0.155, loss_ctc=76.904, loss_att=57.906, acc=0.720, loss=63.606, backward_time=1.050, grad_norm=144.697, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.413e-05, train_time=7.888 +[gpub002:0/64] 2023-07-13 13:13:12,276 (trainer:732) INFO: 44epoch:train:6701-6800batch: iter_time=1.179e-04, forward_time=0.144, loss_ctc=71.936, loss_att=56.557, acc=0.703, loss=61.171, backward_time=1.027, grad_norm=116.798, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.412e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 13:15:28,808 (trainer:732) INFO: 44epoch:train:6801-6900batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=80.914, loss_att=61.465, acc=0.700, loss=67.299, backward_time=1.028, grad_norm=148.620, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.412e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 13:17:44,586 (trainer:732) INFO: 44epoch:train:6901-7000batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=77.724, loss_att=54.794, acc=0.706, loss=61.673, backward_time=1.027, grad_norm=112.450, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.411e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 13:20:00,070 (trainer:732) INFO: 44epoch:train:7001-7100batch: iter_time=1.287e-04, forward_time=0.144, loss_ctc=69.685, loss_att=51.830, acc=0.704, loss=57.186, backward_time=1.026, grad_norm=123.047, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.410e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 13:22:20,661 (trainer:732) INFO: 44epoch:train:7101-7200batch: iter_time=1.207e-04, forward_time=0.165, loss_ctc=81.406, loss_att=55.579, acc=0.704, loss=63.327, backward_time=1.031, grad_norm=171.513, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.410e-05, train_time=2.812 +[gpub002:0/64] 2023-07-13 13:24:36,675 (trainer:732) INFO: 44epoch:train:7201-7300batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=73.378, loss_att=57.476, acc=0.708, loss=62.246, backward_time=1.030, grad_norm=116.929, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.409e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 13:26:53,257 (trainer:732) INFO: 44epoch:train:7301-7400batch: iter_time=1.122e-04, forward_time=0.145, loss_ctc=72.420, loss_att=58.355, acc=0.694, loss=62.575, backward_time=1.029, grad_norm=124.267, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.408e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 13:29:10,199 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 13:29:28,349 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 13:29:31,773 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 13:29:31,773 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 13:29:31,780 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 13:32:48,933 (trainer:732) INFO: 44epoch:train:7401-7500batch: iter_time=1.337, forward_time=0.199, loss_ctc=76.054, loss_att=55.067, acc=0.719, loss=61.363, backward_time=1.035, grad_norm=122.867, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.408e-05, train_time=7.113 +[gpub002:0/64] 2023-07-13 13:35:07,231 (trainer:732) INFO: 44epoch:train:7501-7600batch: iter_time=1.428e-04, forward_time=0.147, loss_ctc=72.133, loss_att=54.014, acc=0.717, loss=59.450, backward_time=1.036, grad_norm=138.467, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.407e-05, train_time=2.766 +[gpub002:0/64] 2023-07-13 13:37:23,964 (trainer:732) INFO: 44epoch:train:7601-7700batch: iter_time=9.887e-05, forward_time=0.145, loss_ctc=78.211, loss_att=59.906, acc=0.709, loss=65.397, backward_time=1.030, grad_norm=141.983, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.407e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 13:39:40,522 (trainer:732) INFO: 44epoch:train:7701-7800batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=81.364, loss_att=54.416, acc=0.730, loss=62.501, backward_time=1.028, grad_norm=135.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.406e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 13:41:55,986 (trainer:732) INFO: 44epoch:train:7801-7900batch: iter_time=1.134e-04, forward_time=0.144, loss_ctc=64.512, loss_att=47.472, acc=0.708, loss=52.584, backward_time=1.026, grad_norm=123.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.405e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 13:44:11,989 (trainer:732) INFO: 44epoch:train:7901-8000batch: iter_time=1.154e-04, forward_time=0.145, loss_ctc=77.259, loss_att=55.929, acc=0.709, loss=62.328, backward_time=1.029, grad_norm=131.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.405e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 13:46:37,906 (trainer:732) INFO: 44epoch:train:8001-8100batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=73.351, loss_att=53.304, acc=0.719, loss=59.318, backward_time=1.062, grad_norm=109.641, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.404e-05, train_time=2.918 +[gpub002:0/64] 2023-07-13 13:48:56,865 (trainer:732) INFO: 44epoch:train:8101-8200batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=74.136, loss_att=61.403, acc=0.701, loss=65.223, backward_time=1.035, grad_norm=118.234, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.403e-05, train_time=2.779 +[gpub002:0/64] 2023-07-13 13:51:17,363 (trainer:732) INFO: 44epoch:train:8201-8300batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=76.981, loss_att=57.043, acc=0.721, loss=63.025, backward_time=1.034, grad_norm=133.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.403e-05, train_time=2.810 +[gpub002:0/64] 2023-07-13 13:52:22,069 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-13 13:52:40,279 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 13:52:43,910 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 13:52:43,910 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-13 13:52:43,916 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 13:59:20,241 (trainer:732) INFO: 44epoch:train:8301-8400batch: iter_time=2.263, forward_time=0.185, loss_ctc=68.303, loss_att=51.973, acc=0.725, loss=56.872, backward_time=1.048, grad_norm=114.275, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.402e-05, train_time=9.657 +[gpub002:0/64] 2023-07-13 14:01:37,614 (trainer:732) INFO: 44epoch:train:8401-8500batch: iter_time=9.752e-05, forward_time=0.144, loss_ctc=71.653, loss_att=56.730, acc=0.711, loss=61.207, backward_time=1.030, grad_norm=117.549, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.402e-05, train_time=2.748 +[gpub002:0/64] 2023-07-13 14:03:54,243 (trainer:732) INFO: 44epoch:train:8501-8600batch: iter_time=1.303e-04, forward_time=0.147, loss_ctc=84.550, loss_att=58.625, acc=0.720, loss=66.402, backward_time=1.031, grad_norm=124.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.401e-05, train_time=2.732 +[gpub002:0/64] 2023-07-13 14:06:11,819 (trainer:732) INFO: 44epoch:train:8601-8700batch: iter_time=1.454e-04, forward_time=0.146, loss_ctc=68.495, loss_att=47.376, acc=0.706, loss=53.712, backward_time=1.032, grad_norm=109.505, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.400e-05, train_time=2.751 +[gpub002:0/64] 2023-07-13 14:08:27,943 (trainer:732) INFO: 44epoch:train:8701-8800batch: iter_time=1.376e-04, forward_time=0.146, loss_ctc=74.817, loss_att=58.004, acc=0.704, loss=63.048, backward_time=1.031, grad_norm=116.827, clip=100.000, loss_scale=5.257e+32, optim_step_time=0.181, optim0_lr0=5.400e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 14:10:46,494 (trainer:732) INFO: 44epoch:train:8801-8900batch: iter_time=1.356e-04, forward_time=0.147, loss_ctc=78.590, loss_att=52.950, acc=0.730, loss=60.642, backward_time=1.032, grad_norm=116.926, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.399e-05, train_time=2.771 +[gpub002:0/64] 2023-07-13 14:13:06,393 (trainer:732) INFO: 44epoch:train:8901-9000batch: iter_time=1.178e-04, forward_time=0.148, loss_ctc=72.075, loss_att=54.319, acc=0.717, loss=59.646, backward_time=1.037, grad_norm=123.080, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.398e-05, train_time=2.798 +[gpub002:0/64] 2023-07-13 14:15:26,105 (trainer:732) INFO: 44epoch:train:9001-9100batch: iter_time=1.106e-04, forward_time=0.146, loss_ctc=79.231, loss_att=65.332, acc=0.701, loss=69.502, backward_time=1.047, grad_norm=112.101, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.398e-05, train_time=2.794 +[gpub002:0/64] 2023-07-13 14:17:01,910 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-13 14:17:20,361 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 14:17:24,093 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 14:17:24,093 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-13 14:17:24,100 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 14:23:35,296 (trainer:732) INFO: 44epoch:train:9101-9200batch: iter_time=1.994, forward_time=0.152, loss_ctc=64.897, loss_att=47.438, acc=0.726, loss=52.676, backward_time=1.049, grad_norm=112.693, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.397e-05, train_time=9.784 +[gpub002:0/64] 2023-07-13 14:25:52,081 (trainer:732) INFO: 44epoch:train:9201-9300batch: iter_time=1.231e-04, forward_time=0.145, loss_ctc=69.610, loss_att=54.648, acc=0.710, loss=59.136, backward_time=1.028, grad_norm=111.652, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.396e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 14:28:09,358 (trainer:732) INFO: 44epoch:train:9301-9400batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=73.827, loss_att=60.050, acc=0.696, loss=64.183, backward_time=1.027, grad_norm=114.849, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.396e-05, train_time=2.745 +[gpub002:0/64] 2023-07-13 14:30:25,352 (trainer:732) INFO: 44epoch:train:9401-9500batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=84.509, loss_att=55.869, acc=0.724, loss=64.461, backward_time=1.028, grad_norm=134.698, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.395e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 14:32:41,493 (trainer:732) INFO: 44epoch:train:9501-9600batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=65.351, loss_att=46.905, acc=0.708, loss=52.439, backward_time=1.027, grad_norm=114.409, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.395e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 14:34:57,531 (trainer:732) INFO: 44epoch:train:9601-9700batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=79.646, loss_att=58.448, acc=0.702, loss=64.808, backward_time=1.030, grad_norm=123.060, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.394e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 14:37:13,895 (trainer:732) INFO: 44epoch:train:9701-9800batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=74.065, loss_att=51.749, acc=0.719, loss=58.444, backward_time=1.028, grad_norm=130.614, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.393e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 14:39:29,723 (trainer:732) INFO: 44epoch:train:9801-9900batch: iter_time=1.294e-04, forward_time=0.146, loss_ctc=75.390, loss_att=60.899, acc=0.695, loss=65.246, backward_time=1.028, grad_norm=124.549, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.393e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 14:41:45,627 (trainer:732) INFO: 44epoch:train:9901-10000batch: iter_time=1.350e-04, forward_time=0.147, loss_ctc=75.203, loss_att=57.881, acc=0.713, loss=63.078, backward_time=1.029, grad_norm=127.418, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.392e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 14:54:13,290 (trainer:338) INFO: 44epoch results: [train] iter_time=0.229, forward_time=0.148, loss_ctc=75.300, loss_att=56.099, acc=0.709, loss=61.860, backward_time=1.033, grad_norm=128.444, clip=100.000, loss_scale=2.826e+32, optim_step_time=0.182, optim0_lr0=5.423e-05, train_time=3.485, time=4 hours, 50 minutes and 36.87 seconds, total_count=410000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.588, cer_ctc=0.256, loss_att=36.048, acc=0.706, cer=0.321, wer=0.984, loss=38.310, time=6 minutes and 1.21 seconds, total_count=41998, gpu_max_cached_mem_GB=37.574, [att_plot] time=6 minutes and 14.4 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-13 14:54:29,899 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub002:0/64] 2023-07-13 14:54:29,920 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/37epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/39epoch.pth +[gpub002:0/64] 2023-07-13 14:54:29,921 (trainer:272) INFO: 45/50epoch started. Estimated time to finish: 1 day, 6 hours and 41 minutes +[gpub002:0/64] 2023-07-13 14:54:30,027 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-13 14:54:47,960 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 14:54:52,321 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 14:54:52,321 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-13 14:54:52,372 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 15:01:36,666 (trainer:732) INFO: 45epoch:train:1-100batch: iter_time=2.837, forward_time=0.166, loss_ctc=81.665, loss_att=63.090, acc=0.699, loss=68.662, backward_time=1.044, grad_norm=124.602, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=5.391e-05, train_time=8.533 +[gpub002:0/64] 2023-07-13 15:03:34,215 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 15:03:58,918 (trainer:732) INFO: 45epoch:train:101-200batch: iter_time=1.454e-04, forward_time=0.167, loss_ctc=78.164, loss_att=54.166, acc=0.720, loss=61.365, backward_time=1.033, grad_norm=145.604, clip=100.000, loss_scale=5.894e+32, optim_step_time=0.183, optim0_lr0=5.391e-05, train_time=2.845 +[gpub002:0/64] 2023-07-13 15:06:25,567 (trainer:732) INFO: 45epoch:train:201-300batch: iter_time=1.205e-04, forward_time=0.151, loss_ctc=68.261, loss_att=52.387, acc=0.704, loss=57.149, backward_time=1.044, grad_norm=106.301, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.390e-05, train_time=2.933 +[gpub002:0/64] 2023-07-13 15:08:51,283 (trainer:732) INFO: 45epoch:train:301-400batch: iter_time=1.249e-04, forward_time=0.151, loss_ctc=66.533, loss_att=51.447, acc=0.717, loss=55.973, backward_time=1.039, grad_norm=108.528, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.390e-05, train_time=2.914 +[gpub002:0/64] 2023-07-13 15:11:15,915 (trainer:732) INFO: 45epoch:train:401-500batch: iter_time=1.270e-04, forward_time=0.162, loss_ctc=75.336, loss_att=58.055, acc=0.709, loss=63.240, backward_time=1.031, grad_norm=122.397, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.389e-05, train_time=2.892 +[gpub002:0/64] 2023-07-13 15:13:39,370 (trainer:732) INFO: 45epoch:train:501-600batch: iter_time=1.190e-04, forward_time=0.175, loss_ctc=80.003, loss_att=63.160, acc=0.708, loss=68.213, backward_time=1.047, grad_norm=123.534, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.388e-05, train_time=2.868 +[gpub002:0/64] 2023-07-13 15:16:05,037 (trainer:732) INFO: 45epoch:train:601-700batch: iter_time=1.220e-04, forward_time=0.160, loss_ctc=66.333, loss_att=48.550, acc=0.711, loss=53.885, backward_time=1.041, grad_norm=123.517, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.388e-05, train_time=2.914 +[gpub002:0/64] 2023-07-13 15:18:34,274 (trainer:732) INFO: 45epoch:train:701-800batch: iter_time=1.331e-04, forward_time=0.169, loss_ctc=70.528, loss_att=52.616, acc=0.702, loss=57.989, backward_time=1.059, grad_norm=106.529, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.387e-05, train_time=2.985 +[gpub002:0/64] 2023-07-13 15:19:38,148 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 15:19:56,052 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 15:19:59,706 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 15:19:59,706 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 15:19:59,712 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 15:25:05,470 (trainer:732) INFO: 45epoch:train:801-900batch: iter_time=1.925, forward_time=0.196, loss_ctc=70.359, loss_att=50.574, acc=0.717, loss=56.510, backward_time=1.041, grad_norm=156.159, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.386e-05, train_time=7.823 +[gpub002:0/64] 2023-07-13 15:27:22,701 (trainer:732) INFO: 45epoch:train:901-1000batch: iter_time=1.363e-04, forward_time=0.147, loss_ctc=79.920, loss_att=62.286, acc=0.707, loss=67.576, backward_time=1.033, grad_norm=133.899, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.386e-05, train_time=2.745 +[gpub002:0/64] 2023-07-13 15:29:38,869 (trainer:732) INFO: 45epoch:train:1001-1100batch: iter_time=1.411e-04, forward_time=0.147, loss_ctc=73.445, loss_att=53.330, acc=0.704, loss=59.364, backward_time=1.030, grad_norm=132.822, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.385e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 15:31:56,763 (trainer:732) INFO: 45epoch:train:1101-1200batch: iter_time=4.801e-04, forward_time=0.146, loss_ctc=71.514, loss_att=56.740, acc=0.723, loss=61.172, backward_time=1.034, grad_norm=142.017, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.385e-05, train_time=2.758 +[gpub002:0/64] 2023-07-13 15:34:12,180 (trainer:732) INFO: 45epoch:train:1201-1300batch: iter_time=9.975e-05, forward_time=0.143, loss_ctc=64.208, loss_att=48.306, acc=0.716, loss=53.077, backward_time=1.025, grad_norm=147.537, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.384e-05, train_time=2.708 +[gpub002:0/64] 2023-07-13 15:36:49,716 (trainer:732) INFO: 45epoch:train:1301-1400batch: iter_time=4.440e-04, forward_time=0.316, loss_ctc=76.597, loss_att=58.015, acc=0.710, loss=63.590, backward_time=1.059, grad_norm=118.802, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.383e-05, train_time=3.150 +[gpub002:0/64] 2023-07-13 15:39:05,648 (trainer:732) INFO: 45epoch:train:1401-1500batch: iter_time=1.019e-04, forward_time=0.145, loss_ctc=75.275, loss_att=58.906, acc=0.714, loss=63.817, backward_time=1.028, grad_norm=121.195, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.383e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 15:41:21,653 (trainer:732) INFO: 45epoch:train:1501-1600batch: iter_time=1.027e-04, forward_time=0.145, loss_ctc=67.840, loss_att=50.503, acc=0.699, loss=55.704, backward_time=1.028, grad_norm=110.323, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.382e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 15:43:03,648 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 15:43:21,668 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 15:43:25,353 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 15:43:25,353 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 15:43:25,360 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 15:47:55,266 (trainer:732) INFO: 45epoch:train:1601-1700batch: iter_time=2.508, forward_time=0.175, loss_ctc=74.468, loss_att=56.404, acc=0.712, loss=61.823, backward_time=1.040, grad_norm=106.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.381e-05, train_time=7.873 +[gpub002:0/64] 2023-07-13 15:50:12,061 (trainer:732) INFO: 45epoch:train:1701-1800batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=71.768, loss_att=53.722, acc=0.718, loss=59.136, backward_time=1.032, grad_norm=124.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.381e-05, train_time=2.736 +[gpub002:0/64] 2023-07-13 15:52:28,137 (trainer:732) INFO: 45epoch:train:1801-1900batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=73.393, loss_att=54.484, acc=0.706, loss=60.157, backward_time=1.028, grad_norm=122.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.380e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 15:54:44,035 (trainer:732) INFO: 45epoch:train:1901-2000batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=72.218, loss_att=56.883, acc=0.716, loss=61.484, backward_time=1.029, grad_norm=99.995, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.380e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 15:56:59,596 (trainer:732) INFO: 45epoch:train:2001-2100batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=64.329, loss_att=47.171, acc=0.728, loss=52.319, backward_time=1.027, grad_norm=106.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.379e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 15:59:15,764 (trainer:732) INFO: 45epoch:train:2101-2200batch: iter_time=1.161e-04, forward_time=0.147, loss_ctc=78.537, loss_att=63.446, acc=0.702, loss=67.973, backward_time=1.030, grad_norm=116.976, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.378e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 16:01:31,536 (trainer:732) INFO: 45epoch:train:2201-2300batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=72.395, loss_att=52.310, acc=0.716, loss=58.335, backward_time=1.027, grad_norm=121.900, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.378e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 16:04:07,432 (trainer:732) INFO: 45epoch:train:2301-2400batch: iter_time=4.133e-04, forward_time=0.291, loss_ctc=64.940, loss_att=49.301, acc=0.698, loss=53.992, backward_time=1.048, grad_norm=119.660, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.195, optim0_lr0=5.377e-05, train_time=3.119 +[gpub002:0/64] 2023-07-13 16:06:26,316 (trainer:732) INFO: 45epoch:train:2401-2500batch: iter_time=1.060e-04, forward_time=0.146, loss_ctc=66.773, loss_att=49.837, acc=0.725, loss=54.918, backward_time=1.033, grad_norm=113.051, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.376e-05, train_time=2.778 +[gpub002:0/64] 2023-07-13 16:06:47,569 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 16:07:05,636 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 16:07:09,245 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 16:07:09,246 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 16:07:09,252 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 16:13:45,255 (trainer:732) INFO: 45epoch:train:2501-2600batch: iter_time=2.932, forward_time=0.146, loss_ctc=75.245, loss_att=59.285, acc=0.705, loss=64.073, backward_time=1.045, grad_norm=143.838, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.376e-05, train_time=8.779 +[gpub002:0/64] 2023-07-13 16:16:02,493 (trainer:732) INFO: 45epoch:train:2601-2700batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=71.169, loss_att=50.380, acc=0.721, loss=56.617, backward_time=1.031, grad_norm=126.623, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.375e-05, train_time=2.745 +[gpub002:0/64] 2023-07-13 16:18:18,405 (trainer:732) INFO: 45epoch:train:2701-2800batch: iter_time=1.474e-04, forward_time=0.146, loss_ctc=72.864, loss_att=55.984, acc=0.701, loss=61.048, backward_time=1.030, grad_norm=110.866, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.375e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 16:20:34,197 (trainer:732) INFO: 45epoch:train:2801-2900batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=71.354, loss_att=54.781, acc=0.724, loss=59.753, backward_time=1.029, grad_norm=114.172, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.374e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 16:22:50,091 (trainer:732) INFO: 45epoch:train:2901-3000batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=63.704, loss_att=47.879, acc=0.721, loss=52.626, backward_time=1.031, grad_norm=113.241, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.373e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 16:25:08,892 (trainer:732) INFO: 45epoch:train:3001-3100batch: iter_time=1.287e-04, forward_time=0.146, loss_ctc=80.531, loss_att=66.219, acc=0.702, loss=70.512, backward_time=1.031, grad_norm=129.615, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.373e-05, train_time=2.776 +[gpub002:0/64] 2023-07-13 16:27:26,710 (trainer:732) INFO: 45epoch:train:3101-3200batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=74.038, loss_att=52.789, acc=0.722, loss=59.164, backward_time=1.033, grad_norm=120.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.372e-05, train_time=2.756 +[gpub002:0/64] 2023-07-13 16:29:45,474 (trainer:732) INFO: 45epoch:train:3201-3300batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=64.774, loss_att=50.047, acc=0.692, loss=54.465, backward_time=1.031, grad_norm=159.741, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.372e-05, train_time=2.775 +[gpub002:0/64] 2023-07-13 16:30:34,615 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 16:30:52,965 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 16:30:56,648 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 16:30:56,648 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 16:30:56,655 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 16:36:51,770 (trainer:732) INFO: 45epoch:train:3301-3400batch: iter_time=1.357, forward_time=0.191, loss_ctc=74.879, loss_att=59.904, acc=0.707, loss=64.396, backward_time=1.042, grad_norm=130.493, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.371e-05, train_time=8.525 +[gpub002:0/64] 2023-07-13 16:39:07,620 (trainer:732) INFO: 45epoch:train:3401-3500batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=72.274, loss_att=48.909, acc=0.725, loss=55.919, backward_time=1.029, grad_norm=119.444, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.370e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 16:41:23,193 (trainer:732) INFO: 45epoch:train:3501-3600batch: iter_time=1.335e-04, forward_time=0.146, loss_ctc=73.254, loss_att=56.523, acc=0.710, loss=61.542, backward_time=1.027, grad_norm=136.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.370e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 16:43:38,990 (trainer:732) INFO: 45epoch:train:3601-3700batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=68.969, loss_att=55.099, acc=0.704, loss=59.260, backward_time=1.029, grad_norm=118.124, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.369e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 16:45:54,638 (trainer:732) INFO: 45epoch:train:3701-3800batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=67.300, loss_att=51.250, acc=0.710, loss=56.065, backward_time=1.027, grad_norm=122.728, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.368e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 16:48:11,353 (trainer:732) INFO: 45epoch:train:3801-3900batch: iter_time=1.176e-04, forward_time=0.147, loss_ctc=79.048, loss_att=65.393, acc=0.693, loss=69.490, backward_time=1.029, grad_norm=138.605, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.368e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 16:50:30,597 (trainer:732) INFO: 45epoch:train:3901-4000batch: iter_time=1.266e-04, forward_time=0.166, loss_ctc=67.808, loss_att=48.262, acc=0.723, loss=54.126, backward_time=1.032, grad_norm=118.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.367e-05, train_time=2.785 +[gpub002:0/64] 2023-07-13 16:52:46,308 (trainer:732) INFO: 45epoch:train:4001-4100batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=66.270, loss_att=49.916, acc=0.699, loss=54.822, backward_time=1.029, grad_norm=112.093, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.367e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 16:54:21,064 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 16:54:39,212 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 16:54:42,631 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 16:54:42,631 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 16:54:42,637 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 17:00:13,505 (trainer:732) INFO: 45epoch:train:4101-4200batch: iter_time=1.377, forward_time=0.185, loss_ctc=69.143, loss_att=51.631, acc=0.718, loss=56.885, backward_time=1.038, grad_norm=123.088, clip=100.000, loss_scale=3.829e+32, optim_step_time=0.184, optim0_lr0=5.366e-05, train_time=8.944 +[gpub002:0/64] 2023-07-13 17:02:31,518 (trainer:732) INFO: 45epoch:train:4201-4300batch: iter_time=1.013e-04, forward_time=0.147, loss_ctc=78.116, loss_att=61.080, acc=0.702, loss=66.191, backward_time=1.036, grad_norm=135.639, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.365e-05, train_time=2.760 +[gpub002:0/64] 2023-07-13 17:04:47,316 (trainer:732) INFO: 45epoch:train:4301-4400batch: iter_time=1.076e-04, forward_time=0.144, loss_ctc=73.863, loss_att=48.660, acc=0.731, loss=56.221, backward_time=1.028, grad_norm=111.515, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.365e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 17:07:04,029 (trainer:732) INFO: 45epoch:train:4401-4500batch: iter_time=1.058e-04, forward_time=0.146, loss_ctc=69.350, loss_att=55.129, acc=0.715, loss=59.396, backward_time=1.031, grad_norm=110.270, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.364e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 17:09:30,922 (trainer:732) INFO: 45epoch:train:4501-4600batch: iter_time=1.035e-04, forward_time=0.146, loss_ctc=64.703, loss_att=49.097, acc=0.721, loss=53.779, backward_time=1.044, grad_norm=107.915, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.363e-05, train_time=2.938 +[gpub002:0/64] 2023-07-13 17:12:18,786 (trainer:732) INFO: 45epoch:train:4601-4700batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=73.234, loss_att=56.968, acc=0.715, loss=61.847, backward_time=1.061, grad_norm=124.928, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.363e-05, train_time=3.357 +[gpub002:0/64] 2023-07-13 17:14:35,194 (trainer:732) INFO: 45epoch:train:4701-4800batch: iter_time=1.380e-04, forward_time=0.148, loss_ctc=76.055, loss_att=59.898, acc=0.712, loss=64.745, backward_time=1.032, grad_norm=120.870, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.362e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 17:16:50,801 (trainer:732) INFO: 45epoch:train:4801-4900batch: iter_time=1.407e-04, forward_time=0.146, loss_ctc=62.951, loss_att=45.591, acc=0.716, loss=50.799, backward_time=1.027, grad_norm=117.396, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.362e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 17:19:06,572 (trainer:732) INFO: 45epoch:train:4901-5000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=68.808, loss_att=51.751, acc=0.704, loss=56.868, backward_time=1.028, grad_norm=136.363, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.361e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 17:19:21,389 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 17:19:39,673 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 17:19:43,090 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 17:19:43,090 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 17:19:43,096 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 17:24:59,179 (trainer:732) INFO: 45epoch:train:5001-5100batch: iter_time=2.009, forward_time=0.147, loss_ctc=73.429, loss_att=58.346, acc=0.701, loss=62.871, backward_time=1.048, grad_norm=125.174, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.360e-05, train_time=7.052 +[gpub002:0/64] 2023-07-13 17:27:16,621 (trainer:732) INFO: 45epoch:train:5101-5200batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=69.877, loss_att=49.329, acc=0.732, loss=55.493, backward_time=1.030, grad_norm=121.180, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.360e-05, train_time=2.749 +[gpub002:0/64] 2023-07-13 17:29:32,165 (trainer:732) INFO: 45epoch:train:5201-5300batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=72.769, loss_att=55.528, acc=0.703, loss=60.701, backward_time=1.026, grad_norm=161.988, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.359e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 17:31:47,590 (trainer:732) INFO: 45epoch:train:5301-5400batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=71.449, loss_att=55.740, acc=0.713, loss=60.453, backward_time=1.025, grad_norm=134.067, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.359e-05, train_time=2.708 +[gpub002:0/64] 2023-07-13 17:34:03,500 (trainer:732) INFO: 45epoch:train:5401-5500batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=63.744, loss_att=47.642, acc=0.716, loss=52.473, backward_time=1.028, grad_norm=120.630, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.358e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 17:36:19,787 (trainer:732) INFO: 45epoch:train:5501-5600batch: iter_time=1.088e-04, forward_time=0.145, loss_ctc=80.084, loss_att=68.051, acc=0.690, loss=71.661, backward_time=1.028, grad_norm=136.204, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.357e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 17:38:35,468 (trainer:732) INFO: 45epoch:train:5601-5700batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=73.364, loss_att=52.280, acc=0.718, loss=58.605, backward_time=1.026, grad_norm=133.967, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.357e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 17:40:50,747 (trainer:732) INFO: 45epoch:train:5701-5800batch: iter_time=1.299e-04, forward_time=0.145, loss_ctc=64.239, loss_att=50.021, acc=0.693, loss=54.286, backward_time=1.026, grad_norm=113.559, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.356e-05, train_time=2.705 +[gpub002:0/64] 2023-07-13 17:41:39,369 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 17:41:57,470 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 17:42:01,065 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 17:42:01,065 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 17:42:01,071 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 17:49:41,785 (trainer:732) INFO: 45epoch:train:5801-5900batch: iter_time=1.389, forward_time=0.199, loss_ctc=66.566, loss_att=47.542, acc=0.725, loss=53.249, backward_time=1.047, grad_norm=123.650, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.185, optim0_lr0=5.355e-05, train_time=10.620 +[gpub002:0/64] 2023-07-13 17:51:58,518 (trainer:732) INFO: 45epoch:train:5901-6000batch: iter_time=1.269e-04, forward_time=0.148, loss_ctc=76.867, loss_att=60.070, acc=0.716, loss=65.109, backward_time=1.030, grad_norm=146.171, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.355e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 17:54:14,647 (trainer:732) INFO: 45epoch:train:6001-6100batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=73.164, loss_att=52.146, acc=0.712, loss=58.451, backward_time=1.031, grad_norm=118.081, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.354e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 17:56:25,548 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 17:56:30,992 (trainer:732) INFO: 45epoch:train:6101-6200batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=72.297, loss_att=55.985, acc=0.727, loss=60.878, backward_time=1.033, grad_norm=127.067, clip=100.000, loss_scale=6.358e+32, optim_step_time=0.182, optim0_lr0=5.354e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 17:58:48,413 (trainer:732) INFO: 45epoch:train:6201-6300batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=62.624, loss_att=46.194, acc=0.728, loss=51.123, backward_time=1.042, grad_norm=124.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.353e-05, train_time=2.748 +[gpub002:0/64] 2023-07-13 18:01:05,345 (trainer:732) INFO: 45epoch:train:6301-6400batch: iter_time=9.910e-05, forward_time=0.146, loss_ctc=75.500, loss_att=57.283, acc=0.714, loss=62.749, backward_time=1.031, grad_norm=118.320, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.352e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 18:03:22,316 (trainer:732) INFO: 45epoch:train:6401-6500batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=70.999, loss_att=56.502, acc=0.721, loss=60.851, backward_time=1.031, grad_norm=119.958, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.352e-05, train_time=2.739 +[gpub002:0/64] 2023-07-13 18:05:38,177 (trainer:732) INFO: 45epoch:train:6501-6600batch: iter_time=1.006e-04, forward_time=0.144, loss_ctc=66.854, loss_att=49.259, acc=0.703, loss=54.538, backward_time=1.027, grad_norm=102.459, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.351e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 18:07:21,216 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 18:07:39,658 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 18:07:43,082 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 18:07:43,082 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 18:07:43,088 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 18:12:41,336 (trainer:732) INFO: 45epoch:train:6601-6700batch: iter_time=1.633, forward_time=0.145, loss_ctc=73.582, loss_att=56.482, acc=0.708, loss=61.612, backward_time=1.040, grad_norm=136.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.351e-05, train_time=8.463 +[gpub002:0/64] 2023-07-13 18:14:58,720 (trainer:732) INFO: 45epoch:train:6701-6800batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=70.722, loss_att=52.661, acc=0.720, loss=58.080, backward_time=1.033, grad_norm=127.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.350e-05, train_time=2.747 +[gpub002:0/64] 2023-07-13 18:17:17,712 (trainer:732) INFO: 45epoch:train:6801-6900batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=73.147, loss_att=53.137, acc=0.710, loss=59.140, backward_time=1.047, grad_norm=130.410, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.349e-05, train_time=2.780 +[gpub002:0/64] 2023-07-13 18:19:40,571 (trainer:732) INFO: 45epoch:train:6901-7000batch: iter_time=1.008e-04, forward_time=0.145, loss_ctc=70.596, loss_att=56.317, acc=0.715, loss=60.601, backward_time=1.032, grad_norm=138.486, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.349e-05, train_time=2.857 +[gpub002:0/64] 2023-07-13 18:22:00,623 (trainer:732) INFO: 45epoch:train:7001-7100batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=63.705, loss_att=46.602, acc=0.721, loss=51.733, backward_time=1.033, grad_norm=101.592, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.348e-05, train_time=2.801 +[gpub002:0/64] 2023-07-13 18:24:43,695 (trainer:732) INFO: 45epoch:train:7101-7200batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=78.651, loss_att=65.443, acc=0.692, loss=69.405, backward_time=1.056, grad_norm=153.875, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.348e-05, train_time=3.261 +[gpub002:0/64] 2023-07-13 18:27:00,050 (trainer:732) INFO: 45epoch:train:7201-7300batch: iter_time=1.117e-04, forward_time=0.146, loss_ctc=69.650, loss_att=51.477, acc=0.714, loss=56.929, backward_time=1.030, grad_norm=126.353, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.347e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 18:29:15,908 (trainer:732) INFO: 45epoch:train:7301-7400batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=64.906, loss_att=48.559, acc=0.703, loss=53.463, backward_time=1.030, grad_norm=131.884, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.346e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 18:31:31,376 (trainer:732) INFO: 45epoch:train:7401-7500batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=66.285, loss_att=49.804, acc=0.719, loss=54.748, backward_time=1.027, grad_norm=119.238, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.346e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 18:31:39,809 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 18:31:58,133 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 18:32:01,554 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 18:32:01,554 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-13 18:32:01,560 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 18:38:55,570 (trainer:732) INFO: 45epoch:train:7501-7600batch: iter_time=2.968, forward_time=0.205, loss_ctc=74.821, loss_att=58.310, acc=0.701, loss=63.263, backward_time=1.049, grad_norm=140.580, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.345e-05, train_time=8.883 +[gpub002:0/64] 2023-07-13 18:41:12,994 (trainer:732) INFO: 45epoch:train:7601-7700batch: iter_time=1.401e-04, forward_time=0.146, loss_ctc=70.499, loss_att=49.199, acc=0.733, loss=55.589, backward_time=1.030, grad_norm=115.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.344e-05, train_time=2.749 +[gpub002:0/64] 2023-07-13 18:43:29,468 (trainer:732) INFO: 45epoch:train:7701-7800batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=72.586, loss_att=55.027, acc=0.709, loss=60.295, backward_time=1.027, grad_norm=112.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.344e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 18:45:45,793 (trainer:732) INFO: 45epoch:train:7801-7900batch: iter_time=1.293e-04, forward_time=0.144, loss_ctc=72.517, loss_att=54.985, acc=0.716, loss=60.244, backward_time=1.025, grad_norm=117.890, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.343e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 18:48:24,077 (trainer:732) INFO: 45epoch:train:7901-8000batch: iter_time=1.442e-04, forward_time=0.145, loss_ctc=63.111, loss_att=46.756, acc=0.716, loss=51.662, backward_time=1.057, grad_norm=120.211, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.343e-05, train_time=3.165 +[gpub002:0/64] 2023-07-13 18:50:40,521 (trainer:732) INFO: 45epoch:train:8001-8100batch: iter_time=1.278e-04, forward_time=0.147, loss_ctc=78.732, loss_att=66.222, acc=0.695, loss=69.975, backward_time=1.031, grad_norm=170.247, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.342e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 18:52:56,982 (trainer:732) INFO: 45epoch:train:8101-8200batch: iter_time=1.082e-04, forward_time=0.146, loss_ctc=71.237, loss_att=50.828, acc=0.721, loss=56.951, backward_time=1.031, grad_norm=144.493, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.341e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 18:55:12,607 (trainer:732) INFO: 45epoch:train:8201-8300batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=63.342, loss_att=47.314, acc=0.705, loss=52.123, backward_time=1.027, grad_norm=120.140, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.341e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 18:56:02,679 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-13 18:56:21,043 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 18:56:24,475 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 18:56:24,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-13 18:56:24,481 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 19:01:54,411 (trainer:732) INFO: 45epoch:train:8301-8400batch: iter_time=1.403, forward_time=0.145, loss_ctc=67.802, loss_att=47.923, acc=0.722, loss=53.886, backward_time=1.041, grad_norm=103.039, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.340e-05, train_time=8.036 +[gpub002:0/64] 2023-07-13 19:04:12,062 (trainer:732) INFO: 45epoch:train:8401-8500batch: iter_time=1.296e-04, forward_time=0.144, loss_ctc=77.578, loss_att=59.511, acc=0.713, loss=64.931, backward_time=1.028, grad_norm=113.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.340e-05, train_time=2.753 +[gpub002:0/64] 2023-07-13 19:06:36,114 (trainer:732) INFO: 45epoch:train:8501-8600batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=72.515, loss_att=51.973, acc=0.709, loss=58.136, backward_time=1.037, grad_norm=112.074, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.339e-05, train_time=2.881 +[gpub002:0/64] 2023-07-13 19:08:58,016 (trainer:732) INFO: 45epoch:train:8601-8700batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=70.363, loss_att=55.476, acc=0.722, loss=59.942, backward_time=1.052, grad_norm=139.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.338e-05, train_time=2.838 +[gpub002:0/64] 2023-07-13 19:11:23,653 (trainer:732) INFO: 45epoch:train:8701-8800batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=61.325, loss_att=45.766, acc=0.718, loss=50.433, backward_time=1.067, grad_norm=128.516, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.338e-05, train_time=2.913 +[gpub002:0/64] 2023-07-13 19:13:45,579 (trainer:732) INFO: 45epoch:train:8801-8900batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=75.654, loss_att=57.454, acc=0.712, loss=62.914, backward_time=1.033, grad_norm=163.219, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.337e-05, train_time=2.838 +[gpub002:0/64] 2023-07-13 19:16:01,117 (trainer:732) INFO: 45epoch:train:8901-9000batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=71.877, loss_att=58.040, acc=0.706, loss=62.191, backward_time=1.026, grad_norm=150.120, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.337e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 19:18:23,985 (trainer:732) INFO: 45epoch:train:9001-9100batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=65.441, loss_att=49.344, acc=0.702, loss=54.173, backward_time=1.036, grad_norm=131.564, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.336e-05, train_time=2.857 +[gpub002:0/64] 2023-07-13 19:20:22,038 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-13 19:20:40,563 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 19:20:44,273 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 19:20:44,273 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 19:20:44,279 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 19:26:50,491 (trainer:732) INFO: 45epoch:train:9101-9200batch: iter_time=1.709, forward_time=0.179, loss_ctc=67.413, loss_att=51.765, acc=0.712, loss=56.459, backward_time=1.050, grad_norm=122.252, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.335e-05, train_time=10.130 +[gpub002:0/64] 2023-07-13 19:29:07,652 (trainer:732) INFO: 45epoch:train:9201-9300batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=76.983, loss_att=59.756, acc=0.700, loss=64.924, backward_time=1.032, grad_norm=139.788, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.335e-05, train_time=2.742 +[gpub002:0/64] 2023-07-13 19:31:25,100 (trainer:732) INFO: 45epoch:train:9301-9400batch: iter_time=9.813e-05, forward_time=0.145, loss_ctc=72.296, loss_att=47.126, acc=0.734, loss=54.677, backward_time=1.028, grad_norm=113.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.334e-05, train_time=2.750 +[gpub002:0/64] 2023-07-13 19:33:41,318 (trainer:732) INFO: 45epoch:train:9401-9500batch: iter_time=9.632e-05, forward_time=0.145, loss_ctc=71.884, loss_att=54.495, acc=0.719, loss=59.712, backward_time=1.027, grad_norm=117.673, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.334e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 19:35:57,136 (trainer:732) INFO: 45epoch:train:9501-9600batch: iter_time=1.092e-04, forward_time=0.145, loss_ctc=64.752, loss_att=48.917, acc=0.719, loss=53.668, backward_time=1.026, grad_norm=117.335, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.333e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 19:38:13,202 (trainer:732) INFO: 45epoch:train:9601-9700batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=71.752, loss_att=55.035, acc=0.712, loss=60.050, backward_time=1.028, grad_norm=125.109, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.332e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 19:40:28,939 (trainer:732) INFO: 45epoch:train:9701-9800batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=75.944, loss_att=60.847, acc=0.702, loss=65.376, backward_time=1.027, grad_norm=149.069, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.332e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 19:42:44,392 (trainer:732) INFO: 45epoch:train:9801-9900batch: iter_time=1.022e-04, forward_time=0.143, loss_ctc=61.928, loss_att=45.638, acc=0.717, loss=50.525, backward_time=1.025, grad_norm=106.833, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.331e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 19:44:59,782 (trainer:732) INFO: 45epoch:train:9901-10000batch: iter_time=9.628e-05, forward_time=0.144, loss_ctc=68.171, loss_att=50.374, acc=0.709, loss=55.713, backward_time=1.024, grad_norm=116.675, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.330e-05, train_time=2.708 +[gpub002:0/64] 2023-07-13 19:59:12,558 (trainer:338) INFO: 45epoch results: [train] iter_time=0.241, forward_time=0.153, loss_ctc=71.101, loss_att=53.886, acc=0.712, loss=59.051, backward_time=1.034, grad_norm=125.414, clip=100.000, loss_scale=3.957e+32, optim_step_time=0.183, optim0_lr0=5.361e-05, train_time=3.486, time=4 hours, 50 minutes and 52.55 seconds, total_count=420000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=42.917, cer_ctc=0.255, loss_att=35.722, acc=0.704, cer=0.334, wer=0.986, loss=37.881, time=7 minutes and 50.24 seconds, total_count=43010, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 59.73 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-13 19:59:28,695 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-13 19:59:28,788 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/29epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/40epoch.pth +[gpub002:0/64] 2023-07-13 19:59:28,788 (trainer:272) INFO: 46/50epoch started. Estimated time to finish: 1 day, 1 hour and 33 minutes +[gpub002:0/64] 2023-07-13 19:59:28,792 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-13 19:59:46,914 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 19:59:50,345 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 19:59:50,345 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 19:59:50,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 20:04:40,186 (trainer:732) INFO: 46epoch:train:1-100batch: iter_time=1.647, forward_time=0.188, loss_ctc=65.883, loss_att=56.587, acc=0.712, loss=59.376, backward_time=1.048, grad_norm=121.603, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.330e-05, train_time=6.227 +[gpub002:0/64] 2023-07-13 20:06:57,011 (trainer:732) INFO: 46epoch:train:101-200batch: iter_time=1.304e-04, forward_time=0.146, loss_ctc=73.119, loss_att=52.389, acc=0.708, loss=58.608, backward_time=1.033, grad_norm=126.577, clip=100.000, loss_scale=3.375e+32, optim_step_time=0.182, optim0_lr0=5.329e-05, train_time=2.737 +[gpub002:0/64] 2023-07-13 20:09:13,490 (trainer:732) INFO: 46epoch:train:201-300batch: iter_time=1.219e-04, forward_time=0.144, loss_ctc=70.101, loss_att=49.203, acc=0.726, loss=55.472, backward_time=1.027, grad_norm=136.978, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.329e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 20:11:29,153 (trainer:732) INFO: 46epoch:train:301-400batch: iter_time=1.195e-04, forward_time=0.145, loss_ctc=67.230, loss_att=54.634, acc=0.706, loss=58.413, backward_time=1.026, grad_norm=117.807, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.328e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 20:13:47,110 (trainer:732) INFO: 46epoch:train:401-500batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=68.084, loss_att=51.893, acc=0.716, loss=56.751, backward_time=1.029, grad_norm=120.025, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.327e-05, train_time=2.759 +[gpub002:0/64] 2023-07-13 20:16:12,488 (trainer:732) INFO: 46epoch:train:501-600batch: iter_time=1.310e-04, forward_time=0.144, loss_ctc=81.257, loss_att=58.875, acc=0.709, loss=65.590, backward_time=1.037, grad_norm=140.480, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.327e-05, train_time=2.907 +[gpub002:0/64] 2023-07-13 20:18:37,169 (trainer:732) INFO: 46epoch:train:601-700batch: iter_time=1.257e-04, forward_time=0.145, loss_ctc=62.945, loss_att=44.662, acc=0.715, loss=50.147, backward_time=1.036, grad_norm=124.728, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.326e-05, train_time=2.893 +[gpub002:0/64] 2023-07-13 20:21:00,820 (trainer:732) INFO: 46epoch:train:701-800batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=73.224, loss_att=50.781, acc=0.730, loss=57.514, backward_time=1.034, grad_norm=141.813, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.326e-05, train_time=2.873 +[gpub002:0/64] 2023-07-13 20:22:00,295 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 20:22:18,119 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 20:22:21,538 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 20:22:21,538 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 20:22:21,544 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 20:28:54,861 (trainer:732) INFO: 46epoch:train:801-900batch: iter_time=3.233, forward_time=0.196, loss_ctc=66.161, loss_att=51.935, acc=0.720, loss=56.203, backward_time=1.045, grad_norm=126.426, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.190, optim0_lr0=5.325e-05, train_time=9.480 +[gpub002:0/64] 2023-07-13 20:31:12,985 (trainer:732) INFO: 46epoch:train:901-1000batch: iter_time=0.001, forward_time=0.153, loss_ctc=74.152, loss_att=55.993, acc=0.706, loss=61.441, backward_time=1.032, grad_norm=120.946, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.324e-05, train_time=2.763 +[gpub002:0/64] 2023-07-13 20:33:29,468 (trainer:732) INFO: 46epoch:train:1001-1100batch: iter_time=0.001, forward_time=0.146, loss_ctc=65.898, loss_att=48.417, acc=0.733, loss=53.661, backward_time=1.032, grad_norm=117.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.324e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 20:36:03,141 (trainer:732) INFO: 46epoch:train:1101-1200batch: iter_time=2.935e-04, forward_time=0.273, loss_ctc=68.912, loss_att=53.279, acc=0.714, loss=57.969, backward_time=1.053, grad_norm=151.138, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.189, optim0_lr0=5.323e-05, train_time=3.072 +[gpub002:0/64] 2023-07-13 20:38:19,536 (trainer:732) INFO: 46epoch:train:1201-1300batch: iter_time=1.362e-04, forward_time=0.146, loss_ctc=70.800, loss_att=51.181, acc=0.720, loss=57.067, backward_time=1.029, grad_norm=152.229, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.323e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 20:40:36,527 (trainer:732) INFO: 46epoch:train:1301-1400batch: iter_time=1.269e-04, forward_time=0.150, loss_ctc=73.810, loss_att=54.374, acc=0.723, loss=60.205, backward_time=1.030, grad_norm=167.798, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.322e-05, train_time=2.740 +[gpub002:0/64] 2023-07-13 20:42:52,466 (trainer:732) INFO: 46epoch:train:1401-1500batch: iter_time=1.267e-04, forward_time=0.146, loss_ctc=68.079, loss_att=49.216, acc=0.708, loss=54.875, backward_time=1.026, grad_norm=209.029, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.321e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 20:45:08,125 (trainer:732) INFO: 46epoch:train:1501-1600batch: iter_time=1.277e-04, forward_time=0.144, loss_ctc=69.590, loss_att=48.587, acc=0.725, loss=54.888, backward_time=1.026, grad_norm=143.890, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.321e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 20:46:58,016 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 20:47:16,447 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 20:47:19,843 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 20:47:19,843 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-13 20:47:19,979 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 20:53:32,464 (trainer:732) INFO: 46epoch:train:1601-1700batch: iter_time=3.523, forward_time=0.178, loss_ctc=68.290, loss_att=51.711, acc=0.723, loss=56.685, backward_time=1.040, grad_norm=114.130, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.320e-05, train_time=10.086 +[gpub002:0/64] 2023-07-13 20:55:49,050 (trainer:732) INFO: 46epoch:train:1701-1800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=69.254, loss_att=57.546, acc=0.707, loss=61.058, backward_time=1.030, grad_norm=147.850, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.320e-05, train_time=2.732 +[gpub002:0/64] 2023-07-13 20:58:04,545 (trainer:732) INFO: 46epoch:train:1801-1900batch: iter_time=1.289e-04, forward_time=0.144, loss_ctc=67.059, loss_att=46.493, acc=0.719, loss=52.663, backward_time=1.026, grad_norm=125.550, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.319e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 21:00:20,791 (trainer:732) INFO: 46epoch:train:1901-2000batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=67.667, loss_att=50.905, acc=0.727, loss=55.934, backward_time=1.029, grad_norm=127.276, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.318e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 21:02:36,612 (trainer:732) INFO: 46epoch:train:2001-2100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=67.126, loss_att=48.506, acc=0.720, loss=54.092, backward_time=1.028, grad_norm=118.373, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.318e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 21:04:53,111 (trainer:732) INFO: 46epoch:train:2101-2200batch: iter_time=1.325e-04, forward_time=0.148, loss_ctc=72.830, loss_att=55.843, acc=0.709, loss=60.939, backward_time=1.030, grad_norm=138.565, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.317e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 21:07:08,871 (trainer:732) INFO: 46epoch:train:2201-2300batch: iter_time=1.351e-04, forward_time=0.145, loss_ctc=69.103, loss_att=52.736, acc=0.704, loss=57.646, backward_time=1.028, grad_norm=122.386, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.317e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 21:09:24,380 (trainer:732) INFO: 46epoch:train:2301-2400batch: iter_time=1.543e-04, forward_time=0.145, loss_ctc=63.429, loss_att=44.374, acc=0.725, loss=50.090, backward_time=1.027, grad_norm=140.538, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.316e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 21:11:48,859 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 21:12:06,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 21:12:10,395 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 21:12:10,395 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 21:12:10,401 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 21:17:51,096 (trainer:732) INFO: 46epoch:train:2401-2500batch: iter_time=1.340, forward_time=0.145, loss_ctc=74.058, loss_att=50.477, acc=0.726, loss=57.551, backward_time=1.066, grad_norm=158.986, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.315e-05, train_time=10.134 +[gpub002:0/64] 2023-07-13 21:20:12,148 (trainer:732) INFO: 46epoch:train:2501-2600batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=73.951, loss_att=59.611, acc=0.702, loss=63.913, backward_time=1.038, grad_norm=135.632, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.315e-05, train_time=2.821 +[gpub002:0/64] 2023-07-13 21:22:28,950 (trainer:732) INFO: 46epoch:train:2601-2700batch: iter_time=1.170e-04, forward_time=0.143, loss_ctc=65.472, loss_att=47.308, acc=0.721, loss=52.757, backward_time=1.030, grad_norm=119.313, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.314e-05, train_time=2.736 +[gpub002:0/64] 2023-07-13 21:24:44,625 (trainer:732) INFO: 46epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=70.360, loss_att=52.584, acc=0.716, loss=57.916, backward_time=1.028, grad_norm=125.573, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.314e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 21:27:00,394 (trainer:732) INFO: 46epoch:train:2801-2900batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=66.237, loss_att=48.990, acc=0.715, loss=54.164, backward_time=1.028, grad_norm=119.418, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.313e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 21:29:16,078 (trainer:732) INFO: 46epoch:train:2901-3000batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=69.138, loss_att=52.011, acc=0.715, loss=57.149, backward_time=1.028, grad_norm=140.082, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.312e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 21:31:31,702 (trainer:732) INFO: 46epoch:train:3001-3100batch: iter_time=1.423e-04, forward_time=0.145, loss_ctc=69.682, loss_att=52.735, acc=0.704, loss=57.819, backward_time=1.028, grad_norm=112.062, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.312e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 21:33:47,412 (trainer:732) INFO: 46epoch:train:3101-3200batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=67.778, loss_att=47.656, acc=0.722, loss=53.693, backward_time=1.027, grad_norm=128.979, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.311e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 21:36:02,959 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 21:36:02,969 (trainer:732) INFO: 46epoch:train:3201-3300batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=74.814, loss_att=52.797, acc=0.718, loss=59.402, backward_time=1.028, grad_norm=125.040, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.311e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 21:36:49,099 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 21:37:07,122 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 21:37:10,547 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 21:37:10,547 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 21:37:10,663 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 21:42:10,676 (trainer:732) INFO: 46epoch:train:3301-3400batch: iter_time=1.578, forward_time=0.207, loss_ctc=67.011, loss_att=54.152, acc=0.720, loss=58.010, backward_time=1.045, grad_norm=167.072, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.310e-05, train_time=7.354 +[gpub002:0/64] 2023-07-13 21:44:26,985 (trainer:732) INFO: 46epoch:train:3401-3500batch: iter_time=1.279e-04, forward_time=0.146, loss_ctc=73.361, loss_att=54.970, acc=0.712, loss=60.487, backward_time=1.029, grad_norm=126.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.309e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 21:46:42,557 (trainer:732) INFO: 46epoch:train:3501-3600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=64.685, loss_att=47.711, acc=0.736, loss=52.804, backward_time=1.026, grad_norm=107.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.309e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 21:48:58,259 (trainer:732) INFO: 46epoch:train:3601-3700batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=68.193, loss_att=53.136, acc=0.717, loss=57.653, backward_time=1.026, grad_norm=121.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.308e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 21:51:15,506 (trainer:732) INFO: 46epoch:train:3701-3800batch: iter_time=1.059e-04, forward_time=0.147, loss_ctc=69.434, loss_att=50.735, acc=0.724, loss=56.345, backward_time=1.030, grad_norm=108.644, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.308e-05, train_time=2.744 +[gpub002:0/64] 2023-07-13 21:53:38,632 (trainer:732) INFO: 46epoch:train:3801-3900batch: iter_time=1.193e-04, forward_time=0.157, loss_ctc=71.575, loss_att=53.589, acc=0.723, loss=58.985, backward_time=1.046, grad_norm=142.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.307e-05, train_time=2.863 +[gpub002:0/64] 2023-07-13 21:55:57,708 (trainer:732) INFO: 46epoch:train:3901-4000batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=67.232, loss_att=49.694, acc=0.711, loss=54.956, backward_time=1.033, grad_norm=118.653, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.306e-05, train_time=2.781 +[gpub002:0/64] 2023-07-13 21:58:13,850 (trainer:732) INFO: 46epoch:train:4001-4100batch: iter_time=1.079e-04, forward_time=0.145, loss_ctc=69.507, loss_att=48.626, acc=0.726, loss=54.891, backward_time=1.030, grad_norm=115.628, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.306e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 22:00:07,007 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 22:00:25,302 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 22:00:28,740 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 22:00:28,740 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 22:00:28,746 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 22:06:11,914 (trainer:732) INFO: 46epoch:train:4101-4200batch: iter_time=3.280, forward_time=0.182, loss_ctc=67.564, loss_att=50.602, acc=0.727, loss=55.691, backward_time=1.048, grad_norm=124.825, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.305e-05, train_time=9.561 +[gpub002:0/64] 2023-07-13 22:08:28,453 (trainer:732) INFO: 46epoch:train:4201-4300batch: iter_time=1.204e-04, forward_time=0.144, loss_ctc=69.532, loss_att=59.594, acc=0.704, loss=62.576, backward_time=1.031, grad_norm=120.778, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.305e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 22:10:44,255 (trainer:732) INFO: 46epoch:train:4301-4400batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=67.758, loss_att=47.109, acc=0.721, loss=53.304, backward_time=1.027, grad_norm=107.527, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.304e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 22:12:59,720 (trainer:732) INFO: 46epoch:train:4401-4500batch: iter_time=1.420e-04, forward_time=0.144, loss_ctc=68.105, loss_att=51.162, acc=0.727, loss=56.245, backward_time=1.025, grad_norm=134.544, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.303e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 22:15:15,435 (trainer:732) INFO: 46epoch:train:4501-4600batch: iter_time=1.536e-04, forward_time=0.146, loss_ctc=67.447, loss_att=49.089, acc=0.720, loss=54.596, backward_time=1.028, grad_norm=123.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.303e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 22:17:31,236 (trainer:732) INFO: 46epoch:train:4601-4700batch: iter_time=1.373e-04, forward_time=0.146, loss_ctc=72.378, loss_att=55.337, acc=0.710, loss=60.450, backward_time=1.028, grad_norm=127.410, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.302e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 22:19:46,893 (trainer:732) INFO: 46epoch:train:4701-4800batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=68.304, loss_att=52.814, acc=0.705, loss=57.461, backward_time=1.027, grad_norm=115.040, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.302e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 22:22:02,247 (trainer:732) INFO: 46epoch:train:4801-4900batch: iter_time=1.396e-04, forward_time=0.145, loss_ctc=62.431, loss_att=43.727, acc=0.729, loss=49.338, backward_time=1.025, grad_norm=120.051, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.301e-05, train_time=2.707 +[gpub002:0/64] 2023-07-13 22:24:18,146 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 22:24:36,789 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 22:24:40,257 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 22:24:40,257 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 22:24:40,263 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 22:31:08,903 (trainer:732) INFO: 46epoch:train:4901-5000batch: iter_time=1.286, forward_time=0.146, loss_ctc=71.786, loss_att=49.940, acc=0.728, loss=56.494, backward_time=1.039, grad_norm=110.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.300e-05, train_time=10.932 +[gpub002:0/64] 2023-07-13 22:33:27,419 (trainer:732) INFO: 46epoch:train:5001-5100batch: iter_time=1.223e-04, forward_time=0.149, loss_ctc=64.751, loss_att=53.246, acc=0.721, loss=56.697, backward_time=1.037, grad_norm=99.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.300e-05, train_time=2.771 +[gpub002:0/64] 2023-07-13 22:35:43,951 (trainer:732) INFO: 46epoch:train:5101-5200batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=72.158, loss_att=51.378, acc=0.713, loss=57.612, backward_time=1.028, grad_norm=111.287, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.299e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 22:37:59,756 (trainer:732) INFO: 46epoch:train:5201-5300batch: iter_time=1.149e-04, forward_time=0.145, loss_ctc=67.483, loss_att=47.864, acc=0.735, loss=53.750, backward_time=1.028, grad_norm=125.499, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.299e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 22:40:23,841 (trainer:732) INFO: 46epoch:train:5301-5400batch: iter_time=1.153e-04, forward_time=0.145, loss_ctc=65.944, loss_att=51.781, acc=0.714, loss=56.030, backward_time=1.042, grad_norm=118.487, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.298e-05, train_time=2.881 +[gpub002:0/64] 2023-07-13 22:42:39,494 (trainer:732) INFO: 46epoch:train:5401-5500batch: iter_time=1.323e-04, forward_time=0.145, loss_ctc=68.524, loss_att=52.761, acc=0.713, loss=57.490, backward_time=1.028, grad_norm=118.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.297e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 22:44:55,465 (trainer:732) INFO: 46epoch:train:5501-5600batch: iter_time=1.347e-04, forward_time=0.146, loss_ctc=75.505, loss_att=56.510, acc=0.706, loss=62.208, backward_time=1.031, grad_norm=124.408, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.297e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 22:47:10,920 (trainer:732) INFO: 46epoch:train:5601-5700batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=62.459, loss_att=43.488, acc=0.724, loss=49.180, backward_time=1.026, grad_norm=107.132, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.296e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 22:49:26,868 (trainer:732) INFO: 46epoch:train:5701-5800batch: iter_time=1.384e-04, forward_time=0.146, loss_ctc=72.636, loss_att=50.457, acc=0.737, loss=57.111, backward_time=1.030, grad_norm=130.378, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.296e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 22:50:15,326 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 22:50:33,588 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 22:50:37,036 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 22:50:37,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 22:50:37,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 22:58:22,565 (trainer:732) INFO: 46epoch:train:5801-5900batch: iter_time=1.406, forward_time=0.196, loss_ctc=70.088, loss_att=56.947, acc=0.697, loss=60.889, backward_time=1.041, grad_norm=108.830, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.295e-05, train_time=10.713 +[gpub002:0/64] 2023-07-13 23:00:39,434 (trainer:732) INFO: 46epoch:train:5901-6000batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=69.077, loss_att=49.338, acc=0.718, loss=55.259, backward_time=1.029, grad_norm=127.466, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.295e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 23:02:54,972 (trainer:732) INFO: 46epoch:train:6001-6100batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=66.432, loss_att=45.825, acc=0.735, loss=52.007, backward_time=1.025, grad_norm=111.877, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.294e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 23:05:10,758 (trainer:732) INFO: 46epoch:train:6101-6200batch: iter_time=1.257e-04, forward_time=0.146, loss_ctc=68.644, loss_att=53.212, acc=0.712, loss=57.841, backward_time=1.028, grad_norm=126.634, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.293e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 23:07:27,287 (trainer:732) INFO: 46epoch:train:6201-6300batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=68.525, loss_att=54.107, acc=0.705, loss=58.432, backward_time=1.032, grad_norm=130.338, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.293e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 23:09:42,933 (trainer:732) INFO: 46epoch:train:6301-6400batch: iter_time=1.285e-04, forward_time=0.145, loss_ctc=73.030, loss_att=55.687, acc=0.706, loss=60.890, backward_time=1.028, grad_norm=122.293, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.292e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 23:11:58,473 (trainer:732) INFO: 46epoch:train:6401-6500batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=62.489, loss_att=42.039, acc=0.728, loss=48.174, backward_time=1.027, grad_norm=110.857, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.292e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 23:14:18,395 (trainer:732) INFO: 46epoch:train:6501-6600batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=70.293, loss_att=50.054, acc=0.730, loss=56.126, backward_time=1.033, grad_norm=116.507, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.291e-05, train_time=2.798 +[gpub002:0/64] 2023-07-13 23:16:08,051 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 23:16:26,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 23:16:29,635 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 23:16:29,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 23:16:29,682 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 23:21:49,471 (trainer:732) INFO: 46epoch:train:6601-6700batch: iter_time=3.060, forward_time=0.146, loss_ctc=72.928, loss_att=58.616, acc=0.707, loss=62.910, backward_time=1.044, grad_norm=131.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.290e-05, train_time=9.021 +[gpub002:0/64] 2023-07-13 23:24:06,689 (trainer:732) INFO: 46epoch:train:6701-6800batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=72.161, loss_att=56.066, acc=0.711, loss=60.894, backward_time=1.031, grad_norm=141.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.290e-05, train_time=2.744 +[gpub002:0/64] 2023-07-13 23:26:23,450 (trainer:732) INFO: 46epoch:train:6801-6900batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=66.501, loss_att=47.899, acc=0.733, loss=53.480, backward_time=1.029, grad_norm=134.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.289e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 23:28:50,171 (trainer:732) INFO: 46epoch:train:6901-7000batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=67.438, loss_att=53.891, acc=0.724, loss=57.955, backward_time=1.034, grad_norm=109.976, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.289e-05, train_time=2.934 +[gpub002:0/64] 2023-07-13 23:31:05,996 (trainer:732) INFO: 46epoch:train:7001-7100batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=66.567, loss_att=49.018, acc=0.721, loss=54.283, backward_time=1.028, grad_norm=118.826, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.288e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 23:33:21,934 (trainer:732) INFO: 46epoch:train:7101-7200batch: iter_time=1.132e-04, forward_time=0.144, loss_ctc=73.747, loss_att=54.406, acc=0.722, loss=60.208, backward_time=1.029, grad_norm=143.217, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.287e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 23:35:37,952 (trainer:732) INFO: 46epoch:train:7201-7300batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=64.890, loss_att=47.420, acc=0.716, loss=52.661, backward_time=1.031, grad_norm=122.847, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.287e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 23:37:54,841 (trainer:732) INFO: 46epoch:train:7301-7400batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=69.972, loss_att=48.798, acc=0.732, loss=55.150, backward_time=1.031, grad_norm=107.927, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.286e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 23:40:11,039 (trainer:732) INFO: 46epoch:train:7401-7500batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=72.249, loss_att=55.096, acc=0.722, loss=60.242, backward_time=1.030, grad_norm=116.626, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.286e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 23:40:12,748 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 23:40:30,664 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 23:40:34,096 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 23:40:34,096 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 23:40:34,102 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 23:46:45,686 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 23:48:38,986 (trainer:732) INFO: 46epoch:train:7501-7600batch: iter_time=1.321, forward_time=0.191, loss_ctc=65.367, loss_att=55.036, acc=0.722, loss=58.135, backward_time=1.048, grad_norm=119.689, clip=100.000, loss_scale=3.775e+32, optim_step_time=0.186, optim0_lr0=5.285e-05, train_time=10.159 +[gpub002:0/64] 2023-07-13 23:50:55,149 (trainer:732) INFO: 46epoch:train:7601-7700batch: iter_time=1.241e-04, forward_time=0.143, loss_ctc=72.022, loss_att=51.304, acc=0.717, loss=57.519, backward_time=1.028, grad_norm=133.215, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.284e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 23:53:11,294 (trainer:732) INFO: 46epoch:train:7701-7800batch: iter_time=1.063e-04, forward_time=0.144, loss_ctc=67.643, loss_att=47.760, acc=0.736, loss=53.725, backward_time=1.028, grad_norm=130.713, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.284e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 23:55:28,212 (trainer:732) INFO: 46epoch:train:7801-7900batch: iter_time=1.197e-04, forward_time=0.145, loss_ctc=64.828, loss_att=52.504, acc=0.715, loss=56.201, backward_time=1.026, grad_norm=142.216, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.283e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 23:57:43,849 (trainer:732) INFO: 46epoch:train:7901-8000batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=67.229, loss_att=51.224, acc=0.726, loss=56.026, backward_time=1.027, grad_norm=115.103, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.283e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 23:59:59,805 (trainer:732) INFO: 46epoch:train:8001-8100batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=73.639, loss_att=56.195, acc=0.717, loss=61.428, backward_time=1.028, grad_norm=125.297, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.282e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 00:02:20,477 (trainer:732) INFO: 46epoch:train:8101-8200batch: iter_time=1.146e-04, forward_time=0.166, loss_ctc=62.564, loss_att=44.204, acc=0.720, loss=49.712, backward_time=1.037, grad_norm=108.944, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.282e-05, train_time=2.813 +[gpub002:0/64] 2023-07-14 00:04:43,201 (trainer:732) INFO: 46epoch:train:8201-8300batch: iter_time=1.046e-04, forward_time=0.150, loss_ctc=71.384, loss_att=50.456, acc=0.737, loss=56.734, backward_time=1.035, grad_norm=124.346, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.281e-05, train_time=2.854 +[gpub002:0/64] 2023-07-14 00:05:53,094 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-14 00:06:11,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 00:06:14,920 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 00:06:14,920 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-14 00:06:14,927 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 00:13:47,812 (trainer:732) INFO: 46epoch:train:8301-8400batch: iter_time=3.876, forward_time=0.196, loss_ctc=69.752, loss_att=55.635, acc=0.712, loss=59.870, backward_time=1.068, grad_norm=118.005, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.280e-05, train_time=10.891 +[gpub002:0/64] 2023-07-14 00:16:05,245 (trainer:732) INFO: 46epoch:train:8401-8500batch: iter_time=1.318e-04, forward_time=0.145, loss_ctc=67.335, loss_att=48.827, acc=0.725, loss=54.379, backward_time=1.027, grad_norm=107.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.280e-05, train_time=2.749 +[gpub002:0/64] 2023-07-14 00:18:21,297 (trainer:732) INFO: 46epoch:train:8501-8600batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=67.132, loss_att=46.641, acc=0.733, loss=52.788, backward_time=1.028, grad_norm=107.093, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.279e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 00:20:42,957 (trainer:732) INFO: 46epoch:train:8601-8700batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=66.963, loss_att=52.861, acc=0.720, loss=57.092, backward_time=1.048, grad_norm=130.169, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.279e-05, train_time=2.833 +[gpub002:0/64] 2023-07-14 00:23:00,099 (trainer:732) INFO: 46epoch:train:8701-8800batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=70.472, loss_att=52.762, acc=0.723, loss=58.075, backward_time=1.032, grad_norm=139.851, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.278e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 00:25:20,177 (trainer:732) INFO: 46epoch:train:8801-8900batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=71.545, loss_att=54.284, acc=0.718, loss=59.463, backward_time=1.034, grad_norm=122.838, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.277e-05, train_time=2.801 +[gpub002:0/64] 2023-07-14 00:27:37,496 (trainer:732) INFO: 46epoch:train:8901-9000batch: iter_time=1.124e-04, forward_time=0.147, loss_ctc=62.966, loss_att=43.277, acc=0.727, loss=49.184, backward_time=1.032, grad_norm=110.704, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.277e-05, train_time=2.746 +[gpub002:0/64] 2023-07-14 00:29:53,320 (trainer:732) INFO: 46epoch:train:9001-9100batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=71.349, loss_att=50.693, acc=0.732, loss=56.890, backward_time=1.028, grad_norm=112.882, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.276e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 00:31:47,729 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-14 00:32:06,387 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 00:32:09,845 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 00:32:09,845 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-14 00:32:09,852 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 00:38:23,995 (trainer:732) INFO: 46epoch:train:9101-9200batch: iter_time=3.641, forward_time=0.187, loss_ctc=72.415, loss_att=55.674, acc=0.717, loss=60.696, backward_time=1.057, grad_norm=127.477, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.276e-05, train_time=10.213 +[gpub002:0/64] 2023-07-14 00:40:40,945 (trainer:732) INFO: 46epoch:train:9201-9300batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=67.604, loss_att=57.124, acc=0.707, loss=60.268, backward_time=1.028, grad_norm=127.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.275e-05, train_time=2.739 +[gpub002:0/64] 2023-07-14 00:42:57,963 (trainer:732) INFO: 46epoch:train:9301-9400batch: iter_time=1.030e-04, forward_time=0.143, loss_ctc=66.251, loss_att=46.442, acc=0.724, loss=52.385, backward_time=1.027, grad_norm=112.768, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.274e-05, train_time=2.740 +[gpub002:0/64] 2023-07-14 00:45:13,954 (trainer:732) INFO: 46epoch:train:9401-9500batch: iter_time=1.018e-04, forward_time=0.144, loss_ctc=67.687, loss_att=51.450, acc=0.727, loss=56.321, backward_time=1.026, grad_norm=130.442, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.274e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 00:47:30,437 (trainer:732) INFO: 46epoch:train:9501-9600batch: iter_time=1.065e-04, forward_time=0.147, loss_ctc=66.976, loss_att=48.132, acc=0.723, loss=53.785, backward_time=1.028, grad_norm=111.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.273e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 00:49:46,162 (trainer:732) INFO: 46epoch:train:9601-9700batch: iter_time=1.500e-04, forward_time=0.146, loss_ctc=73.129, loss_att=56.047, acc=0.713, loss=61.172, backward_time=1.028, grad_norm=121.197, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.273e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 00:52:01,715 (trainer:732) INFO: 46epoch:train:9701-9800batch: iter_time=1.470e-04, forward_time=0.146, loss_ctc=68.415, loss_att=54.013, acc=0.702, loss=58.333, backward_time=1.027, grad_norm=120.706, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.272e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 00:54:17,220 (trainer:732) INFO: 46epoch:train:9801-9900batch: iter_time=1.497e-04, forward_time=0.146, loss_ctc=63.092, loss_att=45.005, acc=0.726, loss=50.431, backward_time=1.027, grad_norm=115.827, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.272e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 00:56:33,163 (trainer:732) INFO: 46epoch:train:9901-10000batch: iter_time=1.520e-04, forward_time=0.147, loss_ctc=71.355, loss_att=49.992, acc=0.729, loss=56.401, backward_time=1.029, grad_norm=110.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.271e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 01:10:58,653 (trainer:338) INFO: 46epoch results: [train] iter_time=0.292, forward_time=0.151, loss_ctc=68.954, loss_att=51.397, acc=0.719, loss=56.664, backward_time=1.032, grad_norm=125.536, clip=100.000, loss_scale=4.322e+32, optim_step_time=0.183, optim0_lr0=5.300e-05, train_time=3.565, time=4 hours, 57 minutes and 19.06 seconds, total_count=430000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.169, cer_ctc=0.254, loss_att=38.108, acc=0.681, cer=0.392, wer=0.992, loss=39.627, time=8 minutes and 21.07 seconds, total_count=44022, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 49.73 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-14 01:11:14,472 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-14 01:11:14,537 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/41epoch.pth +[gpub002:0/64] 2023-07-14 01:11:14,537 (trainer:272) INFO: 47/50epoch started. Estimated time to finish: 20 hours, 29 minutes and 24.2 seconds +[gpub002:0/64] 2023-07-14 01:11:14,540 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-14 01:11:32,416 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 01:11:35,757 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 01:11:35,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-14 01:11:35,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 01:15:48,585 (trainer:732) INFO: 47epoch:train:1-100batch: iter_time=1.312, forward_time=0.182, loss_ctc=75.825, loss_att=61.109, acc=0.689, loss=65.524, backward_time=1.041, grad_norm=158.964, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.270e-05, train_time=5.481 +[gpub002:0/64] 2023-07-14 01:18:27,141 (trainer:732) INFO: 47epoch:train:101-200batch: iter_time=8.032e-04, forward_time=0.180, loss_ctc=64.299, loss_att=48.764, acc=0.701, loss=53.424, backward_time=1.052, grad_norm=127.674, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.270e-05, train_time=3.170 +[gpub002:0/64] 2023-07-14 01:20:57,571 (trainer:732) INFO: 47epoch:train:201-300batch: iter_time=1.242e-04, forward_time=0.156, loss_ctc=68.624, loss_att=50.174, acc=0.700, loss=55.709, backward_time=1.042, grad_norm=132.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.269e-05, train_time=3.010 +[gpub002:0/64] 2023-07-14 01:23:21,420 (trainer:732) INFO: 47epoch:train:301-400batch: iter_time=1.294e-04, forward_time=0.147, loss_ctc=73.786, loss_att=55.907, acc=0.690, loss=61.271, backward_time=1.039, grad_norm=144.858, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.269e-05, train_time=2.876 +[gpub002:0/64] 2023-07-14 01:25:56,177 (trainer:732) INFO: 47epoch:train:401-500batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=65.105, loss_att=48.139, acc=0.706, loss=53.229, backward_time=1.057, grad_norm=144.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.268e-05, train_time=3.096 +[gpub002:0/64] 2023-07-14 01:28:14,232 (trainer:732) INFO: 47epoch:train:501-600batch: iter_time=1.183e-04, forward_time=0.146, loss_ctc=71.910, loss_att=51.661, acc=0.724, loss=57.735, backward_time=1.032, grad_norm=145.246, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.267e-05, train_time=2.761 +[gpub002:0/64] 2023-07-14 01:30:34,563 (trainer:732) INFO: 47epoch:train:601-700batch: iter_time=1.124e-04, forward_time=0.143, loss_ctc=66.419, loss_att=46.645, acc=0.717, loss=52.577, backward_time=1.042, grad_norm=116.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.267e-05, train_time=2.806 +[gpub002:0/64] 2023-07-14 01:32:55,156 (trainer:732) INFO: 47epoch:train:701-800batch: iter_time=1.111e-04, forward_time=0.144, loss_ctc=75.585, loss_att=57.869, acc=0.701, loss=63.184, backward_time=1.031, grad_norm=132.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.266e-05, train_time=2.812 +[gpub002:0/64] 2023-07-14 01:33:50,705 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-14 01:34:08,334 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 01:34:11,683 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 01:34:11,683 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-14 01:34:11,703 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 01:40:02,226 (trainer:732) INFO: 47epoch:train:801-900batch: iter_time=2.580, forward_time=0.174, loss_ctc=75.755, loss_att=60.417, acc=0.703, loss=65.018, backward_time=1.045, grad_norm=133.637, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.266e-05, train_time=8.541 +[gpub002:0/64] 2023-07-14 01:42:20,381 (trainer:732) INFO: 47epoch:train:901-1000batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=69.051, loss_att=52.908, acc=0.716, loss=57.751, backward_time=1.030, grad_norm=128.359, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.265e-05, train_time=2.763 +[gpub002:0/64] 2023-07-14 01:44:36,647 (trainer:732) INFO: 47epoch:train:1001-1100batch: iter_time=1.291e-04, forward_time=0.145, loss_ctc=67.260, loss_att=48.787, acc=0.713, loss=54.329, backward_time=1.033, grad_norm=133.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.264e-05, train_time=2.725 +[gpub002:0/64] 2023-07-14 01:46:52,658 (trainer:732) INFO: 47epoch:train:1101-1200batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=69.474, loss_att=52.446, acc=0.702, loss=57.555, backward_time=1.027, grad_norm=149.543, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.264e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 01:49:08,730 (trainer:732) INFO: 47epoch:train:1201-1300batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=63.276, loss_att=48.489, acc=0.718, loss=52.925, backward_time=1.028, grad_norm=139.425, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.263e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 01:51:24,328 (trainer:732) INFO: 47epoch:train:1301-1400batch: iter_time=1.570e-04, forward_time=0.145, loss_ctc=72.456, loss_att=49.418, acc=0.727, loss=56.329, backward_time=1.028, grad_norm=130.808, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.263e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 01:53:40,003 (trainer:732) INFO: 47epoch:train:1401-1500batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=63.452, loss_att=48.009, acc=0.722, loss=52.642, backward_time=1.027, grad_norm=121.157, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.262e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 01:55:56,022 (trainer:732) INFO: 47epoch:train:1501-1600batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=78.127, loss_att=57.946, acc=0.718, loss=64.000, backward_time=1.030, grad_norm=110.143, clip=100.000, loss_scale=5.906e+32, optim_step_time=0.183, optim0_lr0=5.262e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 01:57:27,328 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-14 01:57:45,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 01:57:48,763 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 01:57:48,763 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-14 01:57:48,769 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 02:02:30,932 (trainer:732) INFO: 47epoch:train:1601-1700batch: iter_time=1.331, forward_time=0.182, loss_ctc=77.305, loss_att=58.961, acc=0.710, loss=64.464, backward_time=1.043, grad_norm=114.992, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.261e-05, train_time=7.898 +[gpub002:0/64] 2023-07-14 02:04:47,565 (trainer:732) INFO: 47epoch:train:1701-1800batch: iter_time=1.311e-04, forward_time=0.146, loss_ctc=68.654, loss_att=53.205, acc=0.712, loss=57.840, backward_time=1.031, grad_norm=151.268, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.260e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 02:07:03,513 (trainer:732) INFO: 47epoch:train:1801-1900batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=65.765, loss_att=46.697, acc=0.724, loss=52.417, backward_time=1.029, grad_norm=120.537, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.260e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 02:09:20,232 (trainer:732) INFO: 47epoch:train:1901-2000batch: iter_time=1.057e-04, forward_time=0.145, loss_ctc=67.723, loss_att=50.397, acc=0.716, loss=55.595, backward_time=1.030, grad_norm=131.389, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.259e-05, train_time=2.734 +[gpub002:0/64] 2023-07-14 02:11:36,110 (trainer:732) INFO: 47epoch:train:2001-2100batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=69.804, loss_att=52.306, acc=0.713, loss=57.555, backward_time=1.028, grad_norm=112.445, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.259e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 02:13:51,747 (trainer:732) INFO: 47epoch:train:2101-2200batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=69.056, loss_att=49.240, acc=0.712, loss=55.185, backward_time=1.025, grad_norm=113.889, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.258e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 02:16:07,448 (trainer:732) INFO: 47epoch:train:2201-2300batch: iter_time=1.067e-04, forward_time=0.144, loss_ctc=67.405, loss_att=49.335, acc=0.732, loss=54.756, backward_time=1.026, grad_norm=125.340, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.258e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 02:18:23,460 (trainer:732) INFO: 47epoch:train:2301-2400batch: iter_time=1.052e-04, forward_time=0.145, loss_ctc=71.576, loss_att=54.166, acc=0.723, loss=59.389, backward_time=1.028, grad_norm=130.633, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.257e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 02:20:48,284 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-14 02:21:06,460 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 02:21:09,867 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 02:21:09,868 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-14 02:21:09,874 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 02:27:11,938 (trainer:732) INFO: 47epoch:train:2401-2500batch: iter_time=1.320, forward_time=0.168, loss_ctc=72.393, loss_att=54.066, acc=0.713, loss=59.565, backward_time=1.066, grad_norm=115.635, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.256e-05, train_time=10.569 +[gpub002:0/64] 2023-07-14 02:29:29,559 (trainer:732) INFO: 47epoch:train:2501-2600batch: iter_time=1.496e-04, forward_time=0.146, loss_ctc=73.743, loss_att=59.618, acc=0.696, loss=63.855, backward_time=1.035, grad_norm=134.162, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.256e-05, train_time=2.752 +[gpub002:0/64] 2023-07-14 02:31:45,140 (trainer:732) INFO: 47epoch:train:2601-2700batch: iter_time=1.647e-04, forward_time=0.145, loss_ctc=63.864, loss_att=48.114, acc=0.703, loss=52.839, backward_time=1.028, grad_norm=120.583, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.255e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 02:34:00,714 (trainer:732) INFO: 47epoch:train:2701-2800batch: iter_time=1.460e-04, forward_time=0.146, loss_ctc=68.746, loss_att=49.931, acc=0.705, loss=55.575, backward_time=1.027, grad_norm=150.817, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.255e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 02:36:18,845 (trainer:732) INFO: 47epoch:train:2801-2900batch: iter_time=1.734e-04, forward_time=0.168, loss_ctc=70.481, loss_att=52.577, acc=0.699, loss=57.948, backward_time=1.029, grad_norm=134.419, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.254e-05, train_time=2.762 +[gpub002:0/64] 2023-07-14 02:38:42,459 (trainer:732) INFO: 47epoch:train:2901-3000batch: iter_time=1.060e-04, forward_time=0.144, loss_ctc=64.475, loss_att=47.169, acc=0.714, loss=52.361, backward_time=1.057, grad_norm=124.697, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.253e-05, train_time=2.872 +[gpub002:0/64] 2023-07-14 02:41:05,117 (trainer:732) INFO: 47epoch:train:3001-3100batch: iter_time=3.092e-04, forward_time=0.157, loss_ctc=72.537, loss_att=51.195, acc=0.726, loss=57.597, backward_time=1.039, grad_norm=137.411, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.253e-05, train_time=2.853 +[gpub002:0/64] 2023-07-14 02:43:46,956 (trainer:732) INFO: 47epoch:train:3101-3200batch: iter_time=0.002, forward_time=0.194, loss_ctc=64.519, loss_att=46.475, acc=0.719, loss=51.888, backward_time=1.090, grad_norm=114.179, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.200, optim0_lr0=5.252e-05, train_time=3.236 +[gpub002:0/64] 2023-07-14 02:44:01,707 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 02:46:11,080 (trainer:732) INFO: 47epoch:train:3201-3300batch: iter_time=1.368e-04, forward_time=0.146, loss_ctc=75.702, loss_att=56.674, acc=0.706, loss=62.383, backward_time=1.046, grad_norm=152.557, clip=100.000, loss_scale=3.510e+32, optim_step_time=0.183, optim0_lr0=5.252e-05, train_time=2.883 +[gpub002:0/64] 2023-07-14 02:47:14,725 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-14 02:47:32,956 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 02:47:36,422 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 02:47:36,422 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-14 02:47:36,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 02:54:55,647 (trainer:732) INFO: 47epoch:train:3301-3400batch: iter_time=3.444, forward_time=0.146, loss_ctc=70.929, loss_att=56.270, acc=0.703, loss=60.668, backward_time=1.052, grad_norm=114.309, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.251e-05, train_time=10.491 +[gpub002:0/64] 2023-07-14 02:57:12,121 (trainer:732) INFO: 47epoch:train:3401-3500batch: iter_time=1.342e-04, forward_time=0.145, loss_ctc=63.963, loss_att=47.556, acc=0.723, loss=52.478, backward_time=1.031, grad_norm=101.174, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.251e-05, train_time=2.729 +[gpub002:0/64] 2023-07-14 02:59:28,257 (trainer:732) INFO: 47epoch:train:3501-3600batch: iter_time=1.333e-04, forward_time=0.147, loss_ctc=66.806, loss_att=50.043, acc=0.710, loss=55.072, backward_time=1.029, grad_norm=138.787, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.250e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 03:01:48,809 (trainer:732) INFO: 47epoch:train:3601-3700batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=71.928, loss_att=54.464, acc=0.712, loss=59.703, backward_time=1.036, grad_norm=200.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.249e-05, train_time=2.811 +[gpub002:0/64] 2023-07-14 03:04:07,361 (trainer:732) INFO: 47epoch:train:3701-3800batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=67.152, loss_att=46.952, acc=0.720, loss=53.012, backward_time=1.030, grad_norm=148.586, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.249e-05, train_time=2.771 +[gpub002:0/64] 2023-07-14 03:06:28,698 (trainer:732) INFO: 47epoch:train:3801-3900batch: iter_time=1.379e-04, forward_time=0.145, loss_ctc=65.566, loss_att=47.542, acc=0.728, loss=52.949, backward_time=1.052, grad_norm=109.596, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.248e-05, train_time=2.827 +[gpub002:0/64] 2023-07-14 03:08:45,304 (trainer:732) INFO: 47epoch:train:3901-4000batch: iter_time=1.467e-04, forward_time=0.145, loss_ctc=70.262, loss_att=51.105, acc=0.728, loss=56.852, backward_time=1.027, grad_norm=130.886, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.248e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 03:11:01,331 (trainer:732) INFO: 47epoch:train:4001-4100batch: iter_time=1.377e-04, forward_time=0.146, loss_ctc=68.748, loss_att=49.916, acc=0.725, loss=55.566, backward_time=1.029, grad_norm=137.121, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.247e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 03:12:38,470 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-14 03:12:56,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 03:13:00,241 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 03:13:00,241 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-14 03:13:00,247 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 03:19:05,776 (trainer:732) INFO: 47epoch:train:4101-4200batch: iter_time=1.352, forward_time=0.208, loss_ctc=74.165, loss_att=58.149, acc=0.700, loss=62.954, backward_time=1.172, grad_norm=121.785, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.247e-05, train_time=9.688 +[gpub002:0/64] 2023-07-14 03:21:21,928 (trainer:732) INFO: 47epoch:train:4201-4300batch: iter_time=1.398e-04, forward_time=0.145, loss_ctc=65.068, loss_att=49.135, acc=0.702, loss=53.915, backward_time=1.028, grad_norm=131.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.246e-05, train_time=2.723 +[gpub002:0/64] 2023-07-14 03:23:38,012 (trainer:732) INFO: 47epoch:train:4301-4400batch: iter_time=1.409e-04, forward_time=0.144, loss_ctc=67.892, loss_att=51.883, acc=0.698, loss=56.686, backward_time=1.029, grad_norm=124.443, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.245e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 03:25:53,885 (trainer:732) INFO: 47epoch:train:4401-4500batch: iter_time=1.313e-04, forward_time=0.144, loss_ctc=68.165, loss_att=48.760, acc=0.707, loss=54.582, backward_time=1.028, grad_norm=148.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.245e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 03:28:09,352 (trainer:732) INFO: 47epoch:train:4501-4600batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=62.490, loss_att=47.304, acc=0.713, loss=51.860, backward_time=1.026, grad_norm=114.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.244e-05, train_time=2.709 +[gpub002:0/64] 2023-07-14 03:30:25,107 (trainer:732) INFO: 47epoch:train:4601-4700batch: iter_time=1.325e-04, forward_time=0.144, loss_ctc=73.055, loss_att=52.955, acc=0.722, loss=58.985, backward_time=1.027, grad_norm=135.483, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.244e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 03:32:40,456 (trainer:732) INFO: 47epoch:train:4701-4800batch: iter_time=1.270e-04, forward_time=0.143, loss_ctc=64.160, loss_att=45.152, acc=0.720, loss=50.854, backward_time=1.025, grad_norm=152.783, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.243e-05, train_time=2.707 +[gpub002:0/64] 2023-07-14 03:34:55,883 (trainer:732) INFO: 47epoch:train:4801-4900batch: iter_time=1.281e-04, forward_time=0.144, loss_ctc=75.470, loss_att=57.125, acc=0.710, loss=62.628, backward_time=1.026, grad_norm=128.779, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.242e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 03:37:11,333 (trainer:732) INFO: 47epoch:train:4901-5000batch: iter_time=1.184e-04, forward_time=0.145, loss_ctc=68.637, loss_att=53.134, acc=0.699, loss=57.785, backward_time=1.027, grad_norm=135.549, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.242e-05, train_time=2.709 +[gpub002:0/64] 2023-07-14 03:37:13,747 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-14 03:37:32,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 03:37:35,790 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 03:37:35,790 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-14 03:37:35,797 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 03:45:17,483 (trainer:732) INFO: 47epoch:train:5001-5100batch: iter_time=1.349, forward_time=0.238, loss_ctc=74.372, loss_att=58.913, acc=0.696, loss=63.550, backward_time=1.050, grad_norm=124.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.241e-05, train_time=9.723 +[gpub002:0/64] 2023-07-14 03:47:33,846 (trainer:732) INFO: 47epoch:train:5101-5200batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=63.535, loss_att=47.253, acc=0.709, loss=52.138, backward_time=1.028, grad_norm=146.127, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.241e-05, train_time=2.727 +[gpub002:0/64] 2023-07-14 03:49:56,309 (trainer:732) INFO: 47epoch:train:5201-5300batch: iter_time=1.404e-04, forward_time=0.145, loss_ctc=67.170, loss_att=48.352, acc=0.708, loss=53.997, backward_time=1.032, grad_norm=104.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.240e-05, train_time=2.849 +[gpub002:0/64] 2023-07-14 03:52:23,539 (trainer:732) INFO: 47epoch:train:5301-5400batch: iter_time=1.378e-04, forward_time=0.147, loss_ctc=70.328, loss_att=52.677, acc=0.704, loss=57.973, backward_time=1.041, grad_norm=125.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.240e-05, train_time=2.944 +[gpub002:0/64] 2023-07-14 03:54:39,620 (trainer:732) INFO: 47epoch:train:5401-5500batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=63.753, loss_att=46.245, acc=0.719, loss=51.498, backward_time=1.030, grad_norm=101.661, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.239e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 03:56:55,460 (trainer:732) INFO: 47epoch:train:5501-5600batch: iter_time=1.463e-04, forward_time=0.145, loss_ctc=70.240, loss_att=51.409, acc=0.725, loss=57.058, backward_time=1.029, grad_norm=153.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.238e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 03:59:20,845 (trainer:732) INFO: 47epoch:train:5601-5700batch: iter_time=7.024e-04, forward_time=0.186, loss_ctc=63.760, loss_att=44.689, acc=0.728, loss=50.410, backward_time=1.053, grad_norm=104.609, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.238e-05, train_time=2.907 +[gpub002:0/64] 2023-07-14 04:01:56,667 (trainer:732) INFO: 47epoch:train:5701-5800batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=75.062, loss_att=56.213, acc=0.712, loss=61.868, backward_time=1.096, grad_norm=113.891, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.237e-05, train_time=3.117 +[gpub002:0/64] 2023-07-14 04:03:00,524 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-14 04:03:18,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 04:03:22,119 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 04:03:22,120 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-14 04:03:22,126 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 04:08:19,987 (trainer:732) INFO: 47epoch:train:5801-5900batch: iter_time=2.282, forward_time=0.213, loss_ctc=75.092, loss_att=60.659, acc=0.705, loss=64.989, backward_time=1.059, grad_norm=119.456, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.237e-05, train_time=7.666 +[gpub002:0/64] 2023-07-14 04:10:36,391 (trainer:732) INFO: 47epoch:train:5901-6000batch: iter_time=1.312e-04, forward_time=0.146, loss_ctc=66.678, loss_att=50.374, acc=0.726, loss=55.266, backward_time=1.030, grad_norm=136.916, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.236e-05, train_time=2.728 +[gpub002:0/64] 2023-07-14 04:12:52,162 (trainer:732) INFO: 47epoch:train:6001-6100batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=65.131, loss_att=47.955, acc=0.721, loss=53.108, backward_time=1.026, grad_norm=129.089, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.236e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 04:15:07,834 (trainer:732) INFO: 47epoch:train:6101-6200batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=67.860, loss_att=50.576, acc=0.714, loss=55.761, backward_time=1.026, grad_norm=109.883, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.235e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 04:17:23,387 (trainer:732) INFO: 47epoch:train:6201-6300batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=61.925, loss_att=47.562, acc=0.718, loss=51.871, backward_time=1.027, grad_norm=111.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.234e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 04:19:46,670 (trainer:732) INFO: 47epoch:train:6301-6400batch: iter_time=0.005, forward_time=0.187, loss_ctc=70.970, loss_att=49.802, acc=0.728, loss=56.152, backward_time=1.043, grad_norm=134.690, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.234e-05, train_time=2.865 +[gpub002:0/64] 2023-07-14 04:22:02,533 (trainer:732) INFO: 47epoch:train:6401-6500batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=63.773, loss_att=48.033, acc=0.726, loss=52.755, backward_time=1.025, grad_norm=110.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.233e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 04:24:18,474 (trainer:732) INFO: 47epoch:train:6501-6600batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=76.393, loss_att=57.916, acc=0.722, loss=63.459, backward_time=1.028, grad_norm=126.533, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.233e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 04:25:50,082 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-14 04:26:08,645 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 04:26:12,059 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 04:26:12,060 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-14 04:26:12,066 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 04:30:38,669 (trainer:732) INFO: 47epoch:train:6601-6700batch: iter_time=1.313, forward_time=0.145, loss_ctc=74.407, loss_att=56.874, acc=0.714, loss=62.134, backward_time=1.044, grad_norm=148.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.232e-05, train_time=7.604 +[gpub002:0/64] 2023-07-14 04:32:55,250 (trainer:732) INFO: 47epoch:train:6701-6800batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=66.947, loss_att=51.659, acc=0.708, loss=56.246, backward_time=1.029, grad_norm=104.621, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.232e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 04:35:11,755 (trainer:732) INFO: 47epoch:train:6801-6900batch: iter_time=1.132e-04, forward_time=0.145, loss_ctc=64.313, loss_att=47.002, acc=0.716, loss=52.196, backward_time=1.028, grad_norm=98.831, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.231e-05, train_time=2.730 +[gpub002:0/64] 2023-07-14 04:37:28,932 (trainer:732) INFO: 47epoch:train:6901-7000batch: iter_time=3.968e-04, forward_time=0.152, loss_ctc=66.847, loss_att=51.004, acc=0.707, loss=55.757, backward_time=1.032, grad_norm=99.645, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.230e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 04:39:53,468 (trainer:732) INFO: 47epoch:train:7001-7100batch: iter_time=1.192e-04, forward_time=0.143, loss_ctc=67.687, loss_att=49.843, acc=0.716, loss=55.196, backward_time=1.040, grad_norm=119.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.230e-05, train_time=2.891 +[gpub002:0/64] 2023-07-14 04:42:25,260 (trainer:732) INFO: 47epoch:train:7101-7200batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=68.017, loss_att=47.851, acc=0.713, loss=53.900, backward_time=1.067, grad_norm=115.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.229e-05, train_time=3.036 +[gpub002:0/64] 2023-07-14 04:44:41,100 (trainer:732) INFO: 47epoch:train:7201-7300batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=66.541, loss_att=47.903, acc=0.728, loss=53.495, backward_time=1.029, grad_norm=118.808, clip=100.000, loss_scale=6.166e+32, optim_step_time=0.183, optim0_lr0=5.229e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 04:46:57,499 (trainer:732) INFO: 47epoch:train:7301-7400batch: iter_time=1.337e-04, forward_time=0.146, loss_ctc=71.183, loss_att=51.940, acc=0.725, loss=57.713, backward_time=1.029, grad_norm=115.971, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.228e-05, train_time=2.728 +[gpub002:0/64] 2023-07-14 04:49:48,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-14 04:50:07,256 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 04:50:10,689 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 04:50:10,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-14 04:50:10,710 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 04:54:02,831 (trainer:732) INFO: 47epoch:train:7401-7500batch: iter_time=2.585, forward_time=0.260, loss_ctc=71.936, loss_att=53.052, acc=0.710, loss=58.717, backward_time=1.061, grad_norm=137.288, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.188, optim0_lr0=5.228e-05, train_time=8.505 +[gpub002:0/64] 2023-07-14 04:56:20,415 (trainer:732) INFO: 47epoch:train:7501-7600batch: iter_time=1.183e-04, forward_time=0.146, loss_ctc=68.399, loss_att=53.056, acc=0.694, loss=57.658, backward_time=1.034, grad_norm=124.323, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.227e-05, train_time=2.753 +[gpub002:0/64] 2023-07-14 04:58:36,815 (trainer:732) INFO: 47epoch:train:7601-7700batch: iter_time=1.276e-04, forward_time=0.144, loss_ctc=67.099, loss_att=48.016, acc=0.713, loss=53.741, backward_time=1.027, grad_norm=111.682, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.226e-05, train_time=2.728 +[gpub002:0/64] 2023-07-14 05:00:53,216 (trainer:732) INFO: 47epoch:train:7701-7800batch: iter_time=1.241e-04, forward_time=0.144, loss_ctc=67.867, loss_att=52.326, acc=0.704, loss=56.988, backward_time=1.029, grad_norm=134.364, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.226e-05, train_time=2.728 +[gpub002:0/64] 2023-07-14 05:01:20,178 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 05:03:08,612 (trainer:732) INFO: 47epoch:train:7801-7900batch: iter_time=1.185e-04, forward_time=0.144, loss_ctc=63.118, loss_att=45.662, acc=0.720, loss=50.899, backward_time=1.027, grad_norm=101.710, clip=100.000, loss_scale=3.841e+32, optim_step_time=0.182, optim0_lr0=5.225e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 05:05:24,239 (trainer:732) INFO: 47epoch:train:7901-8000batch: iter_time=1.352e-04, forward_time=0.145, loss_ctc=70.809, loss_att=49.978, acc=0.711, loss=56.227, backward_time=1.028, grad_norm=119.098, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.225e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 05:07:39,760 (trainer:732) INFO: 47epoch:train:8001-8100batch: iter_time=1.291e-04, forward_time=0.145, loss_ctc=65.678, loss_att=48.315, acc=0.720, loss=53.524, backward_time=1.028, grad_norm=121.079, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.224e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 05:09:55,413 (trainer:732) INFO: 47epoch:train:8101-8200batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=72.102, loss_att=52.782, acc=0.728, loss=58.578, backward_time=1.027, grad_norm=124.500, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.224e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 05:12:11,383 (trainer:732) INFO: 47epoch:train:8201-8300batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=70.788, loss_att=52.774, acc=0.710, loss=58.178, backward_time=1.031, grad_norm=115.453, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.223e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 05:12:57,530 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-14 05:13:15,615 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 05:13:19,325 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 05:13:19,325 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-14 05:13:19,331 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 05:19:34,885 (trainer:732) INFO: 47epoch:train:8301-8400batch: iter_time=1.320, forward_time=0.157, loss_ctc=69.342, loss_att=56.350, acc=0.696, loss=60.247, backward_time=1.040, grad_norm=126.658, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.222e-05, train_time=8.870 +[gpub002:0/64] 2023-07-14 05:21:52,367 (trainer:732) INFO: 47epoch:train:8401-8500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.969, loss_att=47.931, acc=0.724, loss=52.742, backward_time=1.031, grad_norm=107.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.222e-05, train_time=2.749 +[gpub002:0/64] 2023-07-14 05:24:08,298 (trainer:732) INFO: 47epoch:train:8501-8600batch: iter_time=1.100e-04, forward_time=0.144, loss_ctc=65.267, loss_att=48.804, acc=0.713, loss=53.743, backward_time=1.027, grad_norm=121.119, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.221e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 05:26:24,844 (trainer:732) INFO: 47epoch:train:8601-8700batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=72.559, loss_att=55.089, acc=0.712, loss=60.330, backward_time=1.029, grad_norm=122.276, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.221e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 05:28:40,559 (trainer:732) INFO: 47epoch:train:8701-8800batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=65.539, loss_att=47.000, acc=0.719, loss=52.562, backward_time=1.027, grad_norm=122.552, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.220e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 05:30:56,463 (trainer:732) INFO: 47epoch:train:8801-8900batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=66.154, loss_att=47.927, acc=0.733, loss=53.395, backward_time=1.029, grad_norm=129.162, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.220e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 05:33:12,008 (trainer:732) INFO: 47epoch:train:8901-9000batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=71.851, loss_att=52.837, acc=0.726, loss=58.541, backward_time=1.027, grad_norm=117.162, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.219e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 05:35:27,593 (trainer:732) INFO: 47epoch:train:9001-9100batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=69.806, loss_att=51.495, acc=0.721, loss=56.989, backward_time=1.027, grad_norm=118.907, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.218e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 05:37:02,534 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-14 05:37:20,733 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 05:37:24,243 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 05:37:24,243 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-14 05:37:24,250 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 05:41:19,308 (trainer:732) INFO: 47epoch:train:9101-9200batch: iter_time=1.370, forward_time=0.154, loss_ctc=73.500, loss_att=59.056, acc=0.706, loss=63.389, backward_time=1.043, grad_norm=129.829, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.218e-05, train_time=7.034 +[gpub002:0/64] 2023-07-14 05:43:35,917 (trainer:732) INFO: 47epoch:train:9201-9300batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=66.338, loss_att=51.966, acc=0.717, loss=56.278, backward_time=1.031, grad_norm=120.789, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.217e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 05:45:52,711 (trainer:732) INFO: 47epoch:train:9301-9400batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=64.912, loss_att=46.474, acc=0.726, loss=52.005, backward_time=1.029, grad_norm=133.594, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.217e-05, train_time=2.736 +[gpub002:0/64] 2023-07-14 05:48:13,955 (trainer:732) INFO: 47epoch:train:9401-9500batch: iter_time=1.413e-04, forward_time=0.191, loss_ctc=67.262, loss_att=50.534, acc=0.717, loss=55.552, backward_time=1.033, grad_norm=131.021, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.216e-05, train_time=2.825 +[gpub002:0/64] 2023-07-14 05:50:43,138 (trainer:732) INFO: 47epoch:train:9501-9600batch: iter_time=0.001, forward_time=0.230, loss_ctc=68.713, loss_att=50.464, acc=0.725, loss=55.939, backward_time=1.041, grad_norm=113.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.200, optim0_lr0=5.216e-05, train_time=2.981 +[gpub002:0/64] 2023-07-14 05:53:04,340 (trainer:732) INFO: 47epoch:train:9601-9700batch: iter_time=1.376e-04, forward_time=0.146, loss_ctc=67.075, loss_att=47.370, acc=0.719, loss=53.282, backward_time=1.032, grad_norm=122.282, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.215e-05, train_time=2.826 +[gpub002:0/64] 2023-07-14 05:55:20,063 (trainer:732) INFO: 47epoch:train:9701-9800batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=67.510, loss_att=48.979, acc=0.734, loss=54.538, backward_time=1.028, grad_norm=124.005, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.214e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 05:57:35,588 (trainer:732) INFO: 47epoch:train:9801-9900batch: iter_time=1.115e-04, forward_time=0.143, loss_ctc=69.885, loss_att=51.819, acc=0.728, loss=57.239, backward_time=1.028, grad_norm=112.202, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.214e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 05:59:51,428 (trainer:732) INFO: 47epoch:train:9901-10000batch: iter_time=9.828e-05, forward_time=0.144, loss_ctc=71.596, loss_att=53.648, acc=0.720, loss=59.033, backward_time=1.029, grad_norm=136.522, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.213e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 06:14:05,194 (trainer:338) INFO: 47epoch results: [train] iter_time=0.216, forward_time=0.153, loss_ctc=68.897, loss_att=51.327, acc=0.714, loss=56.598, backward_time=1.037, grad_norm=126.595, clip=100.000, loss_scale=3.991e+32, optim_step_time=0.183, optim0_lr0=5.242e-05, train_time=3.463, time=4 hours, 48 minutes and 59.64 seconds, total_count=440000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.333, cer_ctc=0.254, loss_att=36.942, acc=0.674, cer=0.429, wer=0.998, loss=38.859, time=7 minutes and 55.45 seconds, total_count=45034, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 55.55 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-14 06:14:21,862 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-14 06:14:21,871 (trainer:272) INFO: 48/50epoch started. Estimated time to finish: 15 hours, 20 minutes and 28.01 seconds +[gpub002:0/64] 2023-07-14 06:14:22,214 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-14 06:14:41,401 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 06:14:44,825 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 06:14:44,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-14 06:14:44,832 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 06:25:13,027 (trainer:732) INFO: 48epoch:train:1-100batch: iter_time=5.066, forward_time=0.185, loss_ctc=72.566, loss_att=50.721, acc=0.708, loss=57.275, backward_time=1.043, grad_norm=138.002, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.198, optim0_lr0=5.213e-05, train_time=13.017 +[gpub002:0/64] 2023-07-14 06:27:29,352 (trainer:732) INFO: 48epoch:train:101-200batch: iter_time=1.335e-04, forward_time=0.145, loss_ctc=77.852, loss_att=63.076, acc=0.696, loss=67.509, backward_time=1.031, grad_norm=128.219, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.212e-05, train_time=2.726 +[gpub002:0/64] 2023-07-14 06:29:52,948 (trainer:732) INFO: 48epoch:train:201-300batch: iter_time=1.227e-04, forward_time=0.144, loss_ctc=74.900, loss_att=50.312, acc=0.726, loss=57.688, backward_time=1.027, grad_norm=135.033, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.212e-05, train_time=2.872 +[gpub002:0/64] 2023-07-14 06:32:08,576 (trainer:732) INFO: 48epoch:train:301-400batch: iter_time=1.240e-04, forward_time=0.144, loss_ctc=76.914, loss_att=54.854, acc=0.698, loss=61.472, backward_time=1.026, grad_norm=140.589, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.211e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 06:34:24,747 (trainer:732) INFO: 48epoch:train:401-500batch: iter_time=1.252e-04, forward_time=0.143, loss_ctc=64.489, loss_att=50.286, acc=0.704, loss=54.546, backward_time=1.025, grad_norm=120.535, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.210e-05, train_time=2.723 +[gpub002:0/64] 2023-07-14 06:36:44,732 (trainer:732) INFO: 48epoch:train:501-600batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=72.098, loss_att=53.893, acc=0.695, loss=59.355, backward_time=1.026, grad_norm=121.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.210e-05, train_time=2.799 +[gpub002:0/64] 2023-07-14 06:39:10,733 (trainer:732) INFO: 48epoch:train:601-700batch: iter_time=0.004, forward_time=0.187, loss_ctc=68.179, loss_att=50.186, acc=0.717, loss=55.584, backward_time=1.038, grad_norm=119.134, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.209e-05, train_time=2.919 +[gpub002:0/64] 2023-07-14 06:41:44,082 (trainer:732) INFO: 48epoch:train:701-800batch: iter_time=1.232e-04, forward_time=0.239, loss_ctc=72.896, loss_att=50.389, acc=0.701, loss=57.141, backward_time=1.045, grad_norm=117.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.209e-05, train_time=3.068 +[gpub002:0/64] 2023-07-14 06:42:40,091 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-14 06:42:57,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 06:43:01,178 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 06:43:01,178 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-14 06:43:01,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 06:47:36,689 (trainer:732) INFO: 48epoch:train:801-900batch: iter_time=1.777, forward_time=0.193, loss_ctc=75.968, loss_att=53.040, acc=0.714, loss=59.919, backward_time=1.043, grad_norm=123.686, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.208e-05, train_time=7.051 +[gpub002:0/64] 2023-07-14 06:50:07,555 (trainer:732) INFO: 48epoch:train:901-1000batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=80.428, loss_att=59.359, acc=0.718, loss=65.679, backward_time=1.046, grad_norm=134.626, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.208e-05, train_time=3.018 +[gpub002:0/64] 2023-07-14 06:52:23,798 (trainer:732) INFO: 48epoch:train:1001-1100batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=74.892, loss_att=50.678, acc=0.724, loss=57.942, backward_time=1.031, grad_norm=128.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.207e-05, train_time=2.725 +[gpub002:0/64] 2023-07-14 06:54:40,151 (trainer:732) INFO: 48epoch:train:1101-1200batch: iter_time=1.110e-04, forward_time=0.143, loss_ctc=74.451, loss_att=54.112, acc=0.722, loss=60.214, backward_time=1.030, grad_norm=147.532, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.207e-05, train_time=2.727 +[gpub002:0/64] 2023-07-14 06:56:57,546 (trainer:732) INFO: 48epoch:train:1201-1300batch: iter_time=1.023e-04, forward_time=0.144, loss_ctc=63.785, loss_att=48.558, acc=0.713, loss=53.126, backward_time=1.031, grad_norm=111.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.206e-05, train_time=2.748 +[gpub002:0/64] 2023-07-14 06:59:13,500 (trainer:732) INFO: 48epoch:train:1301-1400batch: iter_time=1.072e-04, forward_time=0.144, loss_ctc=72.696, loss_att=53.103, acc=0.718, loss=58.981, backward_time=1.030, grad_norm=130.057, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.205e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 07:01:29,121 (trainer:732) INFO: 48epoch:train:1401-1500batch: iter_time=1.109e-04, forward_time=0.143, loss_ctc=68.627, loss_att=48.296, acc=0.723, loss=54.395, backward_time=1.027, grad_norm=116.485, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.205e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 07:03:53,791 (trainer:732) INFO: 48epoch:train:1501-1600batch: iter_time=8.219e-04, forward_time=0.204, loss_ctc=67.797, loss_att=49.570, acc=0.714, loss=55.038, backward_time=1.040, grad_norm=129.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.196, optim0_lr0=5.204e-05, train_time=2.893 +[gpub002:0/64] 2023-07-14 07:05:50,238 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-14 07:06:08,419 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 07:06:11,812 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 07:06:11,812 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-14 07:06:11,818 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 07:11:36,453 (trainer:732) INFO: 48epoch:train:1601-1700batch: iter_time=3.165, forward_time=0.201, loss_ctc=77.679, loss_att=56.373, acc=0.700, loss=62.765, backward_time=1.041, grad_norm=134.671, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.204e-05, train_time=9.252 +[gpub002:0/64] 2023-07-14 07:13:53,755 (trainer:732) INFO: 48epoch:train:1701-1800batch: iter_time=1.167e-04, forward_time=0.147, loss_ctc=67.651, loss_att=49.981, acc=0.716, loss=55.282, backward_time=1.033, grad_norm=139.985, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.203e-05, train_time=2.747 +[gpub002:0/64] 2023-07-14 07:16:10,215 (trainer:732) INFO: 48epoch:train:1801-1900batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=81.909, loss_att=60.376, acc=0.710, loss=66.836, backward_time=1.031, grad_norm=150.019, clip=100.000, loss_scale=5.841e+32, optim_step_time=0.182, optim0_lr0=5.203e-05, train_time=2.729 +[gpub002:0/64] 2023-07-14 07:18:26,045 (trainer:732) INFO: 48epoch:train:1901-2000batch: iter_time=9.725e-05, forward_time=0.145, loss_ctc=72.694, loss_att=50.879, acc=0.730, loss=57.424, backward_time=1.029, grad_norm=100.349, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.202e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 07:20:41,800 (trainer:732) INFO: 48epoch:train:2001-2100batch: iter_time=9.471e-05, forward_time=0.144, loss_ctc=75.595, loss_att=54.872, acc=0.713, loss=61.089, backward_time=1.028, grad_norm=135.140, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.201e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 07:22:57,479 (trainer:732) INFO: 48epoch:train:2101-2200batch: iter_time=8.940e-05, forward_time=0.144, loss_ctc=63.218, loss_att=46.176, acc=0.721, loss=51.288, backward_time=1.029, grad_norm=166.606, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.201e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 07:25:13,488 (trainer:732) INFO: 48epoch:train:2201-2300batch: iter_time=9.836e-05, forward_time=0.146, loss_ctc=70.710, loss_att=53.282, acc=0.717, loss=58.510, backward_time=1.031, grad_norm=133.467, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.200e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 07:27:29,318 (trainer:732) INFO: 48epoch:train:2301-2400batch: iter_time=9.628e-05, forward_time=0.144, loss_ctc=69.445, loss_att=49.286, acc=0.727, loss=55.334, backward_time=1.029, grad_norm=133.909, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.200e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 07:28:58,595 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 07:29:44,718 (trainer:732) INFO: 48epoch:train:2401-2500batch: iter_time=9.391e-05, forward_time=0.144, loss_ctc=69.277, loss_att=52.038, acc=0.706, loss=57.209, backward_time=1.028, grad_norm=129.891, clip=100.000, loss_scale=5.364e+32, optim_step_time=0.182, optim0_lr0=5.199e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 07:30:01,213 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-14 07:30:19,332 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 07:30:22,768 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 07:30:22,768 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-14 07:30:22,775 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 07:37:23,008 (trainer:732) INFO: 48epoch:train:2501-2600batch: iter_time=3.157, forward_time=0.175, loss_ctc=64.931, loss_att=49.988, acc=0.701, loss=54.471, backward_time=1.040, grad_norm=125.113, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.199e-05, train_time=9.166 +[gpub002:0/64] 2023-07-14 07:39:39,101 (trainer:732) INFO: 48epoch:train:2601-2700batch: iter_time=9.689e-05, forward_time=0.144, loss_ctc=77.176, loss_att=57.787, acc=0.708, loss=63.604, backward_time=1.027, grad_norm=129.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.198e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 07:41:55,336 (trainer:732) INFO: 48epoch:train:2701-2800batch: iter_time=1.280e-04, forward_time=0.147, loss_ctc=78.705, loss_att=57.307, acc=0.716, loss=63.727, backward_time=1.031, grad_norm=161.522, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.198e-05, train_time=2.724 +[gpub002:0/64] 2023-07-14 07:44:11,382 (trainer:732) INFO: 48epoch:train:2801-2900batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=71.666, loss_att=51.698, acc=0.722, loss=57.689, backward_time=1.029, grad_norm=143.465, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.197e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 07:46:27,152 (trainer:732) INFO: 48epoch:train:2901-3000batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=67.826, loss_att=52.816, acc=0.696, loss=57.319, backward_time=1.028, grad_norm=112.690, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.196e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 07:48:42,648 (trainer:732) INFO: 48epoch:train:3001-3100batch: iter_time=1.373e-04, forward_time=0.146, loss_ctc=62.490, loss_att=46.664, acc=0.714, loss=51.412, backward_time=1.026, grad_norm=110.321, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.196e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 07:50:58,216 (trainer:732) INFO: 48epoch:train:3101-3200batch: iter_time=1.433e-04, forward_time=0.147, loss_ctc=71.138, loss_att=53.409, acc=0.703, loss=58.728, backward_time=1.027, grad_norm=124.701, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.195e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 07:53:13,640 (trainer:732) INFO: 48epoch:train:3201-3300batch: iter_time=1.522e-04, forward_time=0.145, loss_ctc=70.454, loss_att=51.309, acc=0.718, loss=57.052, backward_time=1.025, grad_norm=137.045, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.195e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 07:54:01,692 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-14 07:54:19,955 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 07:54:23,377 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 07:54:23,377 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-14 07:54:23,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 07:59:50,081 (trainer:732) INFO: 48epoch:train:3301-3400batch: iter_time=1.291, forward_time=0.146, loss_ctc=67.251, loss_att=52.714, acc=0.689, loss=57.075, backward_time=1.040, grad_norm=162.610, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.194e-05, train_time=7.929 +[gpub002:0/64] 2023-07-14 08:02:06,659 (trainer:732) INFO: 48epoch:train:3401-3500batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=74.470, loss_att=50.103, acc=0.717, loss=57.413, backward_time=1.030, grad_norm=146.633, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.194e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 08:04:22,707 (trainer:732) INFO: 48epoch:train:3501-3600batch: iter_time=1.132e-04, forward_time=0.144, loss_ctc=78.766, loss_att=59.983, acc=0.709, loss=65.618, backward_time=1.031, grad_norm=139.024, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.193e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 08:06:38,451 (trainer:732) INFO: 48epoch:train:3601-3700batch: iter_time=9.308e-05, forward_time=0.144, loss_ctc=69.199, loss_att=49.395, acc=0.729, loss=55.336, backward_time=1.029, grad_norm=127.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.192e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 08:08:54,104 (trainer:732) INFO: 48epoch:train:3701-3800batch: iter_time=1.209e-04, forward_time=0.144, loss_ctc=71.796, loss_att=51.842, acc=0.698, loss=57.828, backward_time=1.028, grad_norm=149.795, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.192e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 08:09:50,838 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 08:11:09,674 (trainer:732) INFO: 48epoch:train:3801-3900batch: iter_time=1.016e-04, forward_time=0.144, loss_ctc=66.375, loss_att=50.166, acc=0.713, loss=55.029, backward_time=1.028, grad_norm=168.422, clip=100.000, loss_scale=2.285e+32, optim_step_time=0.182, optim0_lr0=5.191e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 08:13:25,076 (trainer:732) INFO: 48epoch:train:3901-4000batch: iter_time=1.125e-04, forward_time=0.143, loss_ctc=72.317, loss_att=55.151, acc=0.699, loss=60.301, backward_time=1.027, grad_norm=131.187, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.191e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 08:15:40,836 (trainer:732) INFO: 48epoch:train:4001-4100batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=69.182, loss_att=49.859, acc=0.719, loss=55.656, backward_time=1.029, grad_norm=144.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.190e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 08:17:43,061 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-14 08:18:01,046 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 08:18:04,458 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 08:18:04,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-14 08:18:04,464 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 08:23:10,162 (trainer:732) INFO: 48epoch:train:4101-4200batch: iter_time=2.902, forward_time=0.247, loss_ctc=68.952, loss_att=51.746, acc=0.708, loss=56.908, backward_time=1.052, grad_norm=116.379, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.220, optim0_lr0=5.190e-05, train_time=8.986 +[gpub002:0/64] 2023-07-14 08:25:28,112 (trainer:732) INFO: 48epoch:train:4201-4300batch: iter_time=1.154e-04, forward_time=0.148, loss_ctc=75.515, loss_att=51.753, acc=0.724, loss=58.882, backward_time=1.034, grad_norm=126.570, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.189e-05, train_time=2.759 +[gpub002:0/64] 2023-07-14 08:27:44,220 (trainer:732) INFO: 48epoch:train:4301-4400batch: iter_time=1.206e-04, forward_time=0.146, loss_ctc=74.155, loss_att=55.853, acc=0.722, loss=61.344, backward_time=1.031, grad_norm=127.353, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.189e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 08:30:00,444 (trainer:732) INFO: 48epoch:train:4401-4500batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=74.196, loss_att=52.771, acc=0.728, loss=59.198, backward_time=1.032, grad_norm=142.333, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.188e-05, train_time=2.724 +[gpub002:0/64] 2023-07-14 08:32:16,273 (trainer:732) INFO: 48epoch:train:4501-4600batch: iter_time=1.142e-04, forward_time=0.145, loss_ctc=69.251, loss_att=50.837, acc=0.715, loss=56.361, backward_time=1.029, grad_norm=131.558, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.187e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 08:34:31,949 (trainer:732) INFO: 48epoch:train:4601-4700batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=58.768, loss_att=43.134, acc=0.731, loss=47.825, backward_time=1.028, grad_norm=109.747, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.187e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 08:36:47,789 (trainer:732) INFO: 48epoch:train:4701-4800batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=71.818, loss_att=54.359, acc=0.714, loss=59.597, backward_time=1.029, grad_norm=139.913, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.186e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 08:39:03,632 (trainer:732) INFO: 48epoch:train:4801-4900batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=69.204, loss_att=51.952, acc=0.719, loss=57.128, backward_time=1.029, grad_norm=128.452, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.186e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 08:41:19,530 (trainer:732) INFO: 48epoch:train:4901-5000batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=76.342, loss_att=54.027, acc=0.704, loss=60.721, backward_time=1.030, grad_norm=125.370, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.185e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 08:41:22,274 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-14 08:41:40,677 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 08:41:44,120 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 08:41:44,120 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-14 08:41:44,127 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 08:47:46,295 (trainer:732) INFO: 48epoch:train:5001-5100batch: iter_time=1.324, forward_time=0.145, loss_ctc=64.448, loss_att=47.570, acc=0.724, loss=52.633, backward_time=1.044, grad_norm=141.410, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.185e-05, train_time=7.735 +[gpub002:0/64] 2023-07-14 08:50:02,881 (trainer:732) INFO: 48epoch:train:5101-5200batch: iter_time=8.791e-05, forward_time=0.143, loss_ctc=76.350, loss_att=54.968, acc=0.718, loss=61.383, backward_time=1.029, grad_norm=120.961, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.184e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 08:52:20,062 (trainer:732) INFO: 48epoch:train:5201-5300batch: iter_time=8.431e-05, forward_time=0.145, loss_ctc=78.698, loss_att=54.190, acc=0.731, loss=61.542, backward_time=1.034, grad_norm=145.742, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.184e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 08:54:36,566 (trainer:732) INFO: 48epoch:train:5301-5400batch: iter_time=8.884e-05, forward_time=0.144, loss_ctc=71.014, loss_att=50.305, acc=0.733, loss=56.518, backward_time=1.031, grad_norm=149.754, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.183e-05, train_time=2.730 +[gpub002:0/64] 2023-07-14 08:56:54,010 (trainer:732) INFO: 48epoch:train:5401-5500batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=66.905, loss_att=50.167, acc=0.717, loss=55.189, backward_time=1.029, grad_norm=107.271, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.182e-05, train_time=2.749 +[gpub002:0/64] 2023-07-14 08:59:13,151 (trainer:732) INFO: 48epoch:train:5501-5600batch: iter_time=1.282e-04, forward_time=0.145, loss_ctc=62.122, loss_att=44.641, acc=0.726, loss=49.885, backward_time=1.028, grad_norm=133.648, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.182e-05, train_time=2.783 +[gpub002:0/64] 2023-07-14 09:01:33,741 (trainer:732) INFO: 48epoch:train:5601-5700batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=69.735, loss_att=53.019, acc=0.721, loss=58.034, backward_time=1.038, grad_norm=127.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.181e-05, train_time=2.812 +[gpub002:0/64] 2023-07-14 09:03:52,868 (trainer:732) INFO: 48epoch:train:5701-5800batch: iter_time=1.326e-04, forward_time=0.146, loss_ctc=69.218, loss_att=50.409, acc=0.725, loss=56.052, backward_time=1.035, grad_norm=115.986, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.181e-05, train_time=2.782 +[gpub002:0/64] 2023-07-14 09:04:41,018 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-14 09:04:59,726 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 09:05:03,191 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 09:05:03,191 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-14 09:05:03,197 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 09:11:40,827 (trainer:732) INFO: 48epoch:train:5801-5900batch: iter_time=3.228, forward_time=0.187, loss_ctc=64.451, loss_att=51.138, acc=0.698, loss=55.132, backward_time=1.041, grad_norm=126.975, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.180e-05, train_time=9.358 +[gpub002:0/64] 2023-07-14 09:13:57,967 (trainer:732) INFO: 48epoch:train:5901-6000batch: iter_time=1.175e-04, forward_time=0.144, loss_ctc=74.538, loss_att=51.602, acc=0.714, loss=58.483, backward_time=1.028, grad_norm=138.164, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.180e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 09:16:16,645 (trainer:732) INFO: 48epoch:train:6001-6100batch: iter_time=7.038e-04, forward_time=0.146, loss_ctc=78.354, loss_att=60.750, acc=0.709, loss=66.031, backward_time=1.033, grad_norm=141.686, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.179e-05, train_time=2.773 +[gpub002:0/64] 2023-07-14 09:18:33,621 (trainer:732) INFO: 48epoch:train:6101-6200batch: iter_time=1.237e-04, forward_time=0.146, loss_ctc=69.005, loss_att=49.078, acc=0.732, loss=55.056, backward_time=1.031, grad_norm=107.431, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=5.179e-05, train_time=2.739 +[gpub002:0/64] 2023-07-14 09:20:49,845 (trainer:732) INFO: 48epoch:train:6201-6300batch: iter_time=1.292e-04, forward_time=0.145, loss_ctc=71.735, loss_att=50.224, acc=0.706, loss=56.677, backward_time=1.031, grad_norm=145.134, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.178e-05, train_time=2.724 +[gpub002:0/64] 2023-07-14 09:23:24,114 (trainer:732) INFO: 48epoch:train:6301-6400batch: iter_time=1.285e-04, forward_time=0.283, loss_ctc=66.846, loss_att=50.323, acc=0.714, loss=55.280, backward_time=1.045, grad_norm=114.520, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=5.177e-05, train_time=3.085 +[gpub002:0/64] 2023-07-14 09:25:40,456 (trainer:732) INFO: 48epoch:train:6401-6500batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=72.179, loss_att=54.681, acc=0.704, loss=59.930, backward_time=1.029, grad_norm=148.277, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.177e-05, train_time=2.727 +[gpub002:0/64] 2023-07-14 09:28:20,556 (trainer:732) INFO: 48epoch:train:6501-6600batch: iter_time=1.396e-04, forward_time=0.146, loss_ctc=67.464, loss_att=48.020, acc=0.724, loss=53.853, backward_time=1.113, grad_norm=125.636, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.176e-05, train_time=3.202 +[gpub002:0/64] 2023-07-14 09:30:11,734 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-14 09:30:29,893 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 09:30:33,364 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 09:30:33,364 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-14 09:30:33,370 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 09:34:29,820 (trainer:732) INFO: 48epoch:train:6601-6700batch: iter_time=2.210, forward_time=0.145, loss_ctc=65.650, loss_att=48.209, acc=0.712, loss=53.441, backward_time=1.036, grad_norm=123.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.176e-05, train_time=7.385 +[gpub002:0/64] 2023-07-14 09:36:46,637 (trainer:732) INFO: 48epoch:train:6701-6800batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=68.915, loss_att=49.606, acc=0.726, loss=55.399, backward_time=1.031, grad_norm=124.451, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.175e-05, train_time=2.736 +[gpub002:0/64] 2023-07-14 09:39:03,225 (trainer:732) INFO: 48epoch:train:6801-6900batch: iter_time=1.266e-04, forward_time=0.146, loss_ctc=79.366, loss_att=60.971, acc=0.714, loss=66.490, backward_time=1.031, grad_norm=177.915, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.175e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 09:41:26,301 (trainer:732) INFO: 48epoch:train:6901-7000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=71.703, loss_att=47.655, acc=0.738, loss=54.869, backward_time=1.033, grad_norm=118.979, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.174e-05, train_time=2.861 +[gpub002:0/64] 2023-07-14 09:43:47,692 (trainer:732) INFO: 48epoch:train:7001-7100batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=74.135, loss_att=51.319, acc=0.722, loss=58.164, backward_time=1.037, grad_norm=130.583, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.174e-05, train_time=2.828 +[gpub002:0/64] 2023-07-14 09:46:22,408 (trainer:732) INFO: 48epoch:train:7101-7200batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=61.004, loss_att=45.940, acc=0.727, loss=50.459, backward_time=1.063, grad_norm=116.241, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.173e-05, train_time=3.094 +[gpub002:0/64] 2023-07-14 09:48:38,387 (trainer:732) INFO: 48epoch:train:7201-7300batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=71.858, loss_att=52.784, acc=0.722, loss=58.507, backward_time=1.030, grad_norm=147.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.172e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 09:51:09,299 (trainer:732) INFO: 48epoch:train:7301-7400batch: iter_time=1.101e-04, forward_time=0.147, loss_ctc=69.932, loss_att=48.151, acc=0.732, loss=54.685, backward_time=1.064, grad_norm=130.713, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.172e-05, train_time=3.018 +[gpub002:0/64] 2023-07-14 09:53:26,312 (trainer:732) INFO: 48epoch:train:7401-7500batch: iter_time=1.054e-04, forward_time=0.146, loss_ctc=67.229, loss_att=50.273, acc=0.714, loss=55.360, backward_time=1.031, grad_norm=147.697, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.171e-05, train_time=2.740 +[gpub002:0/64] 2023-07-14 09:53:28,551 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-14 09:53:46,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 09:53:50,086 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 09:53:50,086 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-14 09:53:50,092 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 10:00:46,448 (trainer:732) INFO: 48epoch:train:7501-7600batch: iter_time=1.330, forward_time=0.146, loss_ctc=64.829, loss_att=48.046, acc=0.710, loss=53.081, backward_time=1.043, grad_norm=121.945, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.171e-05, train_time=8.803 +[gpub002:0/64] 2023-07-14 10:03:03,225 (trainer:732) INFO: 48epoch:train:7601-7700batch: iter_time=1.167e-04, forward_time=0.145, loss_ctc=73.763, loss_att=54.433, acc=0.716, loss=60.232, backward_time=1.031, grad_norm=121.388, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.170e-05, train_time=2.735 +[gpub002:0/64] 2023-07-14 10:05:19,335 (trainer:732) INFO: 48epoch:train:7701-7800batch: iter_time=1.329e-04, forward_time=0.145, loss_ctc=77.979, loss_att=56.628, acc=0.718, loss=63.034, backward_time=1.030, grad_norm=145.939, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.170e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 10:07:35,239 (trainer:732) INFO: 48epoch:train:7801-7900batch: iter_time=1.459e-04, forward_time=0.144, loss_ctc=70.685, loss_att=50.240, acc=0.729, loss=56.374, backward_time=1.030, grad_norm=121.249, clip=100.000, loss_scale=2.564e+32, optim_step_time=0.182, optim0_lr0=5.169e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 10:10:05,010 (trainer:732) INFO: 48epoch:train:7901-8000batch: iter_time=1.328e-04, forward_time=0.239, loss_ctc=67.160, loss_att=50.782, acc=0.700, loss=55.695, backward_time=1.051, grad_norm=128.964, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.169e-05, train_time=2.995 +[gpub002:0/64] 2023-07-14 10:12:20,278 (trainer:732) INFO: 48epoch:train:8001-8100batch: iter_time=1.339e-04, forward_time=0.144, loss_ctc=63.723, loss_att=47.622, acc=0.712, loss=52.453, backward_time=1.025, grad_norm=112.150, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.168e-05, train_time=2.705 +[gpub002:0/64] 2023-07-14 10:14:36,085 (trainer:732) INFO: 48epoch:train:8101-8200batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=74.529, loss_att=54.062, acc=0.708, loss=60.202, backward_time=1.027, grad_norm=108.241, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.167e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 10:16:52,011 (trainer:732) INFO: 48epoch:train:8201-8300batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=68.909, loss_att=49.305, acc=0.729, loss=55.186, backward_time=1.028, grad_norm=144.843, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.167e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 10:17:38,915 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-14 10:17:57,252 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 10:18:00,656 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 10:18:00,656 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-14 10:18:00,663 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 10:22:44,622 (trainer:732) INFO: 48epoch:train:8301-8400batch: iter_time=2.087, forward_time=0.144, loss_ctc=66.298, loss_att=48.900, acc=0.708, loss=54.119, backward_time=1.041, grad_norm=129.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.166e-05, train_time=7.052 +[gpub002:0/64] 2023-07-14 10:25:04,181 (trainer:732) INFO: 48epoch:train:8401-8500batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=79.695, loss_att=59.887, acc=0.715, loss=65.829, backward_time=1.030, grad_norm=136.298, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.166e-05, train_time=2.791 +[gpub002:0/64] 2023-07-14 10:27:20,855 (trainer:732) INFO: 48epoch:train:8501-8600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=73.605, loss_att=52.399, acc=0.724, loss=58.761, backward_time=1.029, grad_norm=129.008, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.165e-05, train_time=2.733 +[gpub002:0/64] 2023-07-14 10:29:36,805 (trainer:732) INFO: 48epoch:train:8601-8700batch: iter_time=1.263e-04, forward_time=0.145, loss_ctc=73.931, loss_att=52.596, acc=0.720, loss=58.997, backward_time=1.027, grad_norm=123.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.165e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 10:31:52,421 (trainer:732) INFO: 48epoch:train:8701-8800batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=62.565, loss_att=48.800, acc=0.699, loss=52.930, backward_time=1.025, grad_norm=111.238, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.164e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 10:34:08,232 (trainer:732) INFO: 48epoch:train:8801-8900batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=71.599, loss_att=54.096, acc=0.705, loss=59.347, backward_time=1.027, grad_norm=114.515, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.164e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 10:36:23,781 (trainer:732) INFO: 48epoch:train:8901-9000batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=67.382, loss_att=47.060, acc=0.724, loss=53.156, backward_time=1.026, grad_norm=109.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.163e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 10:38:39,320 (trainer:732) INFO: 48epoch:train:9001-9100batch: iter_time=1.261e-04, forward_time=0.144, loss_ctc=65.844, loss_att=47.748, acc=0.721, loss=53.177, backward_time=1.027, grad_norm=117.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.163e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 10:40:20,761 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-14 10:40:38,784 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 10:40:42,509 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 10:40:42,509 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-14 10:40:42,516 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 10:44:51,880 (trainer:732) INFO: 48epoch:train:9101-9200batch: iter_time=2.166, forward_time=0.173, loss_ctc=76.442, loss_att=55.319, acc=0.703, loss=61.656, backward_time=1.038, grad_norm=122.819, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.162e-05, train_time=7.451 +[gpub002:0/64] 2023-07-14 10:47:08,550 (trainer:732) INFO: 48epoch:train:9201-9300batch: iter_time=1.199e-04, forward_time=0.144, loss_ctc=68.780, loss_att=49.932, acc=0.724, loss=55.586, backward_time=1.031, grad_norm=107.542, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.161e-05, train_time=2.733 +[gpub002:0/64] 2023-07-14 10:49:26,299 (trainer:732) INFO: 48epoch:train:9301-9400batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=79.867, loss_att=61.875, acc=0.714, loss=67.273, backward_time=1.031, grad_norm=156.270, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.161e-05, train_time=2.755 +[gpub002:0/64] 2023-07-14 10:51:42,933 (trainer:732) INFO: 48epoch:train:9401-9500batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=72.470, loss_att=48.680, acc=0.737, loss=55.817, backward_time=1.029, grad_norm=122.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.160e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 10:53:58,937 (trainer:732) INFO: 48epoch:train:9501-9600batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=70.022, loss_att=50.699, acc=0.724, loss=56.496, backward_time=1.029, grad_norm=132.905, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.160e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 10:56:14,682 (trainer:732) INFO: 48epoch:train:9601-9700batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=60.015, loss_att=45.859, acc=0.727, loss=50.106, backward_time=1.028, grad_norm=143.949, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.159e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 10:58:33,862 (trainer:732) INFO: 48epoch:train:9701-9800batch: iter_time=1.277e-04, forward_time=0.146, loss_ctc=70.164, loss_att=53.081, acc=0.720, loss=58.206, backward_time=1.036, grad_norm=114.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.159e-05, train_time=2.783 +[gpub002:0/64] 2023-07-14 11:00:52,286 (trainer:732) INFO: 48epoch:train:9801-9900batch: iter_time=1.221e-04, forward_time=0.147, loss_ctc=67.592, loss_att=47.935, acc=0.733, loss=53.832, backward_time=1.034, grad_norm=113.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.158e-05, train_time=2.768 +[gpub002:0/64] 2023-07-14 11:03:07,869 (trainer:732) INFO: 48epoch:train:9901-10000batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=66.172, loss_att=49.889, acc=0.716, loss=54.774, backward_time=1.028, grad_norm=112.218, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.158e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 11:17:21,394 (trainer:338) INFO: 48epoch results: [train] iter_time=0.297, forward_time=0.153, loss_ctc=70.855, loss_att=51.822, acc=0.716, loss=57.532, backward_time=1.033, grad_norm=130.378, clip=100.000, loss_scale=2.805e+32, optim_step_time=0.183, optim0_lr0=5.185e-05, train_time=3.465, time=4 hours, 49 minutes and 0.35 seconds, total_count=450000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=42.674, cer_ctc=0.250, loss_att=37.042, acc=0.682, cer=0.410, wer=0.996, loss=38.731, time=7 minutes and 58.92 seconds, total_count=46046, gpu_max_cached_mem_GB=37.574, [att_plot] time=6 minutes and 0.19 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-14 11:17:37,205 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-14 11:17:37,216 (trainer:272) INFO: 49/50epoch started. Estimated time to finish: 10 hours, 12 minutes and 51.12 seconds +[gpub002:0/64] 2023-07-14 11:17:37,220 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-14 11:17:55,035 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 11:17:58,436 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 11:17:58,436 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-14 11:17:58,442 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 11:23:36,393 (trainer:732) INFO: 49epoch:train:1-100batch: iter_time=1.992, forward_time=0.180, loss_ctc=75.424, loss_att=56.205, acc=0.707, loss=61.970, backward_time=1.067, grad_norm=126.161, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.157e-05, train_time=7.183 +[gpub002:0/64] 2023-07-14 11:25:52,971 (trainer:732) INFO: 49epoch:train:101-200batch: iter_time=1.338e-04, forward_time=0.145, loss_ctc=78.109, loss_att=58.369, acc=0.696, loss=64.291, backward_time=1.030, grad_norm=156.563, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.156e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 11:28:10,045 (trainer:732) INFO: 49epoch:train:201-300batch: iter_time=1.411e-04, forward_time=0.145, loss_ctc=71.274, loss_att=53.833, acc=0.706, loss=59.065, backward_time=1.031, grad_norm=117.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.156e-05, train_time=2.741 +[gpub002:0/64] 2023-07-14 11:30:33,978 (trainer:732) INFO: 49epoch:train:301-400batch: iter_time=1.382e-04, forward_time=0.143, loss_ctc=82.935, loss_att=67.130, acc=0.686, loss=71.872, backward_time=1.041, grad_norm=143.181, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.155e-05, train_time=2.878 +[gpub002:0/64] 2023-07-14 11:33:11,725 (trainer:732) INFO: 49epoch:train:401-500batch: iter_time=1.370e-04, forward_time=0.144, loss_ctc=67.558, loss_att=49.800, acc=0.725, loss=55.128, backward_time=1.047, grad_norm=137.364, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.155e-05, train_time=3.155 +[gpub002:0/64] 2023-07-14 11:35:47,703 (trainer:732) INFO: 49epoch:train:501-600batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=67.201, loss_att=46.260, acc=0.720, loss=52.542, backward_time=1.045, grad_norm=114.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.154e-05, train_time=3.119 +[gpub002:0/64] 2023-07-14 11:38:18,577 (trainer:732) INFO: 49epoch:train:601-700batch: iter_time=1.325e-04, forward_time=0.150, loss_ctc=70.668, loss_att=51.546, acc=0.714, loss=57.283, backward_time=1.040, grad_norm=119.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.154e-05, train_time=3.017 +[gpub002:0/64] 2023-07-14 11:40:45,549 (trainer:732) INFO: 49epoch:train:701-800batch: iter_time=1.343e-04, forward_time=0.144, loss_ctc=61.423, loss_att=43.823, acc=0.717, loss=49.103, backward_time=1.037, grad_norm=107.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.153e-05, train_time=2.939 +[gpub002:0/64] 2023-07-14 11:41:43,214 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-14 11:42:01,107 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 11:42:04,459 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 11:42:04,459 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-14 11:42:04,480 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 11:48:17,184 (trainer:732) INFO: 49epoch:train:801-900batch: iter_time=2.782, forward_time=0.203, loss_ctc=83.766, loss_att=63.502, acc=0.709, loss=69.582, backward_time=1.053, grad_norm=161.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.153e-05, train_time=9.032 +[gpub002:0/64] 2023-07-14 11:50:46,902 (trainer:732) INFO: 49epoch:train:901-1000batch: iter_time=9.525e-05, forward_time=0.144, loss_ctc=74.996, loss_att=53.416, acc=0.702, loss=59.890, backward_time=1.043, grad_norm=131.720, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.152e-05, train_time=2.994 +[gpub002:0/64] 2023-07-14 11:53:03,014 (trainer:732) INFO: 49epoch:train:1001-1100batch: iter_time=8.920e-05, forward_time=0.144, loss_ctc=71.753, loss_att=56.326, acc=0.702, loss=60.954, backward_time=1.032, grad_norm=130.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.152e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 11:55:18,875 (trainer:732) INFO: 49epoch:train:1101-1200batch: iter_time=1.102e-04, forward_time=0.143, loss_ctc=79.119, loss_att=61.363, acc=0.706, loss=66.690, backward_time=1.030, grad_norm=115.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.151e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 11:57:34,399 (trainer:732) INFO: 49epoch:train:1201-1300batch: iter_time=1.012e-04, forward_time=0.144, loss_ctc=72.908, loss_att=53.208, acc=0.712, loss=59.118, backward_time=1.028, grad_norm=115.476, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.150e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 11:59:49,721 (trainer:732) INFO: 49epoch:train:1301-1400batch: iter_time=1.097e-04, forward_time=0.142, loss_ctc=61.698, loss_att=43.601, acc=0.720, loss=49.030, backward_time=1.027, grad_norm=118.028, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.150e-05, train_time=2.706 +[gpub002:0/64] 2023-07-14 12:02:24,426 (trainer:732) INFO: 49epoch:train:1401-1500batch: iter_time=0.010, forward_time=0.243, loss_ctc=69.799, loss_att=51.631, acc=0.718, loss=57.082, backward_time=1.055, grad_norm=137.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.201, optim0_lr0=5.149e-05, train_time=3.093 +[gpub002:0/64] 2023-07-14 12:04:40,243 (trainer:732) INFO: 49epoch:train:1501-1600batch: iter_time=1.270e-04, forward_time=0.145, loss_ctc=62.191, loss_att=43.167, acc=0.720, loss=48.874, backward_time=1.028, grad_norm=110.624, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.149e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 12:06:21,070 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-14 12:06:39,230 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 12:06:42,623 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 12:06:42,623 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-14 12:06:42,629 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 12:10:28,549 (trainer:732) INFO: 49epoch:train:1601-1700batch: iter_time=1.995, forward_time=0.145, loss_ctc=86.888, loss_att=63.993, acc=0.702, loss=70.861, backward_time=1.042, grad_norm=144.375, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.148e-05, train_time=6.966 +[gpub002:0/64] 2023-07-14 12:12:46,571 (trainer:732) INFO: 49epoch:train:1701-1800batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=72.288, loss_att=55.796, acc=0.708, loss=60.743, backward_time=1.033, grad_norm=127.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.148e-05, train_time=2.760 +[gpub002:0/64] 2023-07-14 12:15:04,541 (trainer:732) INFO: 49epoch:train:1801-1900batch: iter_time=1.254e-04, forward_time=0.146, loss_ctc=72.461, loss_att=50.750, acc=0.721, loss=57.264, backward_time=1.031, grad_norm=124.107, clip=100.000, loss_scale=5.127e+32, optim_step_time=0.182, optim0_lr0=5.147e-05, train_time=2.759 +[gpub002:0/64] 2023-07-14 12:17:21,880 (trainer:732) INFO: 49epoch:train:1901-2000batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=77.931, loss_att=60.068, acc=0.711, loss=65.427, backward_time=1.031, grad_norm=134.506, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.147e-05, train_time=2.747 +[gpub002:0/64] 2023-07-14 12:19:44,196 (trainer:732) INFO: 49epoch:train:2001-2100batch: iter_time=1.327e-04, forward_time=0.145, loss_ctc=74.295, loss_att=58.950, acc=0.728, loss=63.554, backward_time=1.047, grad_norm=139.921, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.146e-05, train_time=2.846 +[gpub002:0/64] 2023-07-14 12:21:47,756 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 12:22:28,541 (trainer:732) INFO: 49epoch:train:2101-2200batch: iter_time=2.902e-04, forward_time=0.195, loss_ctc=66.147, loss_att=47.940, acc=0.731, loss=53.402, backward_time=1.122, grad_norm=127.949, clip=100.000, loss_scale=5.497e+32, optim_step_time=0.196, optim0_lr0=5.146e-05, train_time=3.286 +[gpub002:0/64] 2023-07-14 12:24:44,739 (trainer:732) INFO: 49epoch:train:2201-2300batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=63.495, loss_att=43.732, acc=0.732, loss=49.661, backward_time=1.028, grad_norm=107.717, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.145e-05, train_time=2.724 +[gpub002:0/64] 2023-07-14 12:27:00,588 (trainer:732) INFO: 49epoch:train:2301-2400batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=70.905, loss_att=53.186, acc=0.722, loss=58.502, backward_time=1.028, grad_norm=119.707, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.144e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 12:29:16,288 (trainer:732) INFO: 49epoch:train:2401-2500batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=71.254, loss_att=49.884, acc=0.724, loss=56.295, backward_time=1.028, grad_norm=137.105, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.144e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 12:29:36,316 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-14 12:29:55,262 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 12:29:58,726 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 12:29:58,726 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-14 12:29:58,784 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 12:36:52,242 (trainer:732) INFO: 49epoch:train:2501-2600batch: iter_time=3.018, forward_time=0.183, loss_ctc=77.934, loss_att=56.931, acc=0.709, loss=63.232, backward_time=1.042, grad_norm=160.805, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.143e-05, train_time=9.116 +[gpub002:0/64] 2023-07-14 12:39:09,252 (trainer:732) INFO: 49epoch:train:2601-2700batch: iter_time=1.261e-04, forward_time=0.144, loss_ctc=76.698, loss_att=56.198, acc=0.715, loss=62.348, backward_time=1.031, grad_norm=159.881, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.143e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 12:41:25,308 (trainer:732) INFO: 49epoch:train:2701-2800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=70.984, loss_att=50.376, acc=0.725, loss=56.558, backward_time=1.031, grad_norm=114.170, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.142e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 12:43:41,674 (trainer:732) INFO: 49epoch:train:2801-2900batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=80.056, loss_att=64.744, acc=0.711, loss=69.337, backward_time=1.033, grad_norm=127.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.142e-05, train_time=2.727 +[gpub002:0/64] 2023-07-14 12:45:57,246 (trainer:732) INFO: 49epoch:train:2901-3000batch: iter_time=1.143e-04, forward_time=0.144, loss_ctc=67.599, loss_att=50.304, acc=0.735, loss=55.492, backward_time=1.027, grad_norm=117.099, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.141e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 12:48:13,149 (trainer:732) INFO: 49epoch:train:3001-3100batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=64.905, loss_att=44.347, acc=0.736, loss=50.514, backward_time=1.030, grad_norm=130.061, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.141e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 12:50:30,467 (trainer:732) INFO: 49epoch:train:3101-3200batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=68.429, loss_att=50.737, acc=0.731, loss=56.045, backward_time=1.028, grad_norm=111.368, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.140e-05, train_time=2.746 +[gpub002:0/64] 2023-07-14 12:52:46,238 (trainer:732) INFO: 49epoch:train:3201-3300batch: iter_time=1.227e-04, forward_time=0.144, loss_ctc=62.564, loss_att=44.638, acc=0.725, loss=50.016, backward_time=1.028, grad_norm=110.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.140e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 12:53:46,523 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-14 12:54:05,029 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 12:54:08,395 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 12:54:08,395 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-14 12:54:08,401 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 12:59:16,516 (trainer:732) INFO: 49epoch:train:3301-3400batch: iter_time=1.682, forward_time=0.171, loss_ctc=82.313, loss_att=58.467, acc=0.718, loss=65.621, backward_time=1.042, grad_norm=192.097, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.139e-05, train_time=7.805 +[gpub002:0/64] 2023-07-14 13:02:14,955 (trainer:732) INFO: 49epoch:train:3401-3500batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=72.811, loss_att=56.064, acc=0.705, loss=61.088, backward_time=1.086, grad_norm=128.058, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.138e-05, train_time=3.569 +[gpub002:0/64] 2023-07-14 13:05:10,836 (trainer:732) INFO: 49epoch:train:3501-3600batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=70.741, loss_att=51.039, acc=0.720, loss=56.950, backward_time=1.079, grad_norm=144.537, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.138e-05, train_time=3.517 +[gpub002:0/64] 2023-07-14 13:07:59,359 (trainer:732) INFO: 49epoch:train:3601-3700batch: iter_time=1.242e-04, forward_time=0.144, loss_ctc=82.023, loss_att=65.980, acc=0.690, loss=70.793, backward_time=1.065, grad_norm=119.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.137e-05, train_time=3.370 +[gpub002:0/64] 2023-07-14 13:10:33,803 (trainer:732) INFO: 49epoch:train:3701-3800batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=68.382, loss_att=49.269, acc=0.731, loss=55.003, backward_time=1.040, grad_norm=119.342, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.137e-05, train_time=3.089 +[gpub002:0/64] 2023-07-14 13:13:52,522 (trainer:732) INFO: 49epoch:train:3801-3900batch: iter_time=1.390e-04, forward_time=0.146, loss_ctc=70.071, loss_att=52.760, acc=0.711, loss=57.953, backward_time=1.081, grad_norm=133.837, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.136e-05, train_time=3.974 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2147805.0 ON gpub002 CANCELLED AT 2023-07-14T13:15:07 DUE TO TIME LIMIT ***