diff --git "a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log" "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log" new file mode 100644--- /dev/null +++ "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log" @@ -0,0 +1,4446 @@ +# Running on gpub005.delta.ncsa.illinois.edu +# Started at Fri Jul 7 20:05:18 CDT 2023 +# SLURMD_NODENAME=gpub005 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2138608 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2138608 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[005,012-014,018,030,039-041,067,072,084,095-098]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[005,012-014,018,030,039-041,067,072,084,095-098]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=2408067 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub005 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +[gpub005:0/64] 2023-07-07 20:08:24,356 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub005:0/64] 2023-07-07 20:08:25,263 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub005:0/64] 2023-07-07 20:08:25,288 (s2t:483) INFO: Vocabulary size: 50002 +[gpub005:0/64] 2023-07-07 20:08:40,904 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub005:0/64] 2023-07-07 20:08:40,939 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub005:0/64] 2023-07-07 20:08:41,640 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub005:0/64] 2023-07-07 20:08:50,082 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:08:50,311 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:08:50,311 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub005:0/64] 2023-07-07 20:08:50,314 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub005:0/64] 2023-07-07 20:08:50,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub005:0/64] 2023-07-07 20:09:20,037 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub005:2408151:2408151 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0> +gpub005:2408151:2408151 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub005:2408151:2408151 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub005:0/64] 2023-07-07 20:09:25,530 (trainer:284) INFO: 23/30epoch started +[gpub005:0/64] 2023-07-07 20:09:25,594 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-07 20:09:43,306 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:09:46,752 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:09:46,752 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-07 20:09:46,758 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpub005:2408154:2408154 [3] NCCL INFO cudaDriverVersion 12010 +gpub005:2408154:2408154 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0> +gpub005:2408154:2408154 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub005:2408154:2408227 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0> +gpub005:2408154:2408227 [3] NCCL INFO Using network IB +gpub005:2408154:2408227 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub005:2408154:2408227 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub005:2408154:2408227 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub005:2408154:2408227 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub005:2408154:2408227 [3] NCCL INFO Connected all rings +gpub005:2408154:2408227 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub005:2408154:2408227 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub005:2408154:2408227 [3] NCCL INFO Connected all trees +gpub005:2408154:2408227 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub005:2408154:2408227 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub005:2408154:2408227 [3] NCCL INFO comm 0x519a6580 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub005:2408151:2408226 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0> +gpub005:2408151:2408226 [0] NCCL INFO Using network IB +gpub005:2408151:2408226 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub005:2408151:2408226 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub005:2408151:2408226 [0] NCCL INFO Connected all rings +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Connected all trees +gpub005:2408151:2408226 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub005:2408151:2408226 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub005:2408151:2408226 [0] NCCL INFO comm 0x8dda0850 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub005:2408152:2408152 [1] NCCL INFO cudaDriverVersion 12010 +gpub005:2408152:2408152 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0> +gpub005:2408152:2408152 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub005:2408152:2408228 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0> +gpub005:2408152:2408228 [1] NCCL INFO Using network IB +gpub005:2408152:2408228 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub005:2408152:2408228 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub005:2408152:2408228 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub005:2408152:2408228 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub005:2408152:2408228 [1] NCCL INFO Connected all rings +gpub005:2408152:2408228 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub005:2408152:2408228 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub005:2408152:2408228 [1] NCCL INFO Connected all trees +gpub005:2408152:2408228 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub005:2408152:2408228 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub005:2408152:2408228 [1] NCCL INFO comm 0x50e7e140 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub067:1574054:1574054 [0] NCCL INFO cudaDriverVersion 12010 +gpub067:1574054:1574054 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1574054:1574054 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1574054:1574131 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1574054:1574131 [0] NCCL INFO Using network IB +gpub067:1574054:1574131 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub067:1574054:1574131 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub067:1574054:1574131 [0] NCCL INFO Connected all rings +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Connected all trees +gpub067:1574054:1574131 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1574054:1574131 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1574054:1574131 [0] NCCL INFO comm 0x4f342150 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub095:2520061:2520061 [2] NCCL INFO cudaDriverVersion 12010 +gpub095:2520061:2520061 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0> +gpub095:2520061:2520061 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub095:2520061:2520137 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0> +gpub095:2520061:2520137 [2] NCCL INFO Using network IB +gpub095:2520061:2520137 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub095:2520061:2520137 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub095:2520061:2520137 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub095:2520061:2520137 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub095:2520061:2520137 [2] NCCL INFO Connected all rings +gpub095:2520061:2520137 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub095:2520061:2520137 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub095:2520061:2520137 [2] NCCL INFO Connected all trees +gpub095:2520061:2520137 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub095:2520061:2520137 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub095:2520061:2520137 [2] NCCL INFO comm 0x91b7930 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub013:1694053:1694053 [0] NCCL INFO cudaDriverVersion 12010 +gpub013:1694053:1694053 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1694053:1694053 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1694053:1694130 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1694053:1694130 [0] NCCL INFO Using network IB +gpub013:1694053:1694130 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub013:1694053:1694130 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub013:1694053:1694130 [0] NCCL INFO Connected all rings +gpub005:2408153:2408153 [2] NCCL INFO cudaDriverVersion 12010 +gpub005:2408153:2408153 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0> +gpub005:2408153:2408153 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub005:2408153:2408229 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0> +gpub005:2408153:2408229 [2] NCCL INFO Using network IB +gpub005:2408153:2408229 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub005:2408153:2408229 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub005:2408153:2408229 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub005:2408153:2408229 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub005:2408153:2408229 [2] NCCL INFO Connected all rings +gpub005:2408153:2408229 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub005:2408153:2408229 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Connected all trees +gpub013:1694053:1694130 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub013:1694053:1694130 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1694053:1694130 [0] NCCL INFO comm 0x8c6ae750 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub005:2408153:2408229 [2] NCCL INFO Connected all trees +gpub005:2408153:2408229 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub005:2408153:2408229 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub005:2408153:2408229 [2] NCCL INFO comm 0x4fab6870 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub013:1694054:1694054 [1] NCCL INFO cudaDriverVersion 12010 +gpub013:1694054:1694054 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1694054:1694054 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1694054:1694131 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1694054:1694131 [1] NCCL INFO Using network IB +gpub013:1694054:1694131 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub013:1694054:1694131 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub013:1694054:1694131 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub013:1694054:1694131 [1] NCCL INFO Connected all rings +gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub013:1694054:1694131 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub013:1694054:1694131 [1] NCCL INFO Connected all trees +gpub013:1694054:1694131 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub013:1694054:1694131 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1694054:1694131 [1] NCCL INFO comm 0x5088d590 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub013:1694055:1694055 [2] NCCL INFO cudaDriverVersion 12010 +gpub013:1694055:1694055 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1694055:1694055 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1694055:1694128 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1694055:1694128 [2] NCCL INFO Using network IB +gpub013:1694055:1694128 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub013:1694055:1694128 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub013:1694055:1694128 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub013:1694055:1694128 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub013:1694055:1694128 [2] NCCL INFO Connected all rings +gpub013:1694055:1694128 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub013:1694055:1694128 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub013:1694055:1694128 [2] NCCL INFO Connected all trees +gpub013:1694055:1694128 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub013:1694055:1694128 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1694055:1694128 [2] NCCL INFO comm 0xf6b9b10 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub067:1574055:1574055 [1] NCCL INFO cudaDriverVersion 12010 +gpub067:1574055:1574055 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1574055:1574055 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1574055:1574134 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1574055:1574134 [1] NCCL INFO Using network IB +gpub067:1574055:1574134 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub067:1574055:1574134 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub067:1574055:1574134 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub067:1574055:1574134 [1] NCCL INFO Connected all rings +gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub067:1574055:1574134 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub067:1574055:1574134 [1] NCCL INFO Connected all trees +gpub067:1574055:1574134 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1574055:1574134 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1574055:1574134 [1] NCCL INFO comm 0x509b90f0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub095:2520062:2520062 [3] NCCL INFO cudaDriverVersion 12010 +gpub095:2520062:2520062 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0> +gpub095:2520062:2520062 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub095:2520062:2520138 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0> +gpub095:2520062:2520138 [3] NCCL INFO Using network IB +gpub095:2520062:2520138 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub095:2520062:2520138 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub095:2520062:2520138 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub095:2520062:2520138 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub095:2520062:2520138 [3] NCCL INFO Connected all rings +gpub095:2520062:2520138 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub095:2520062:2520138 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub095:2520062:2520138 [3] NCCL INFO Connected all trees +gpub095:2520062:2520138 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub095:2520062:2520138 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub095:2520062:2520138 [3] NCCL INFO comm 0x8c7104c0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub095:2520059:2520059 [0] NCCL INFO cudaDriverVersion 12010 +gpub095:2520059:2520059 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0> +gpub095:2520059:2520059 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub095:2520059:2520136 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0> +gpub095:2520059:2520136 [0] NCCL INFO Using network IB +gpub095:2520059:2520136 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub095:2520059:2520136 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub095:2520059:2520136 [0] NCCL INFO Connected all rings +gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Connected all trees +gpub095:2520059:2520136 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub095:2520059:2520136 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub095:2520059:2520136 [0] NCCL INFO comm 0x15bb1c50 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub095:2520060:2520060 [1] NCCL INFO cudaDriverVersion 12010 +gpub095:2520060:2520060 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0> +gpub095:2520060:2520060 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub095:2520060:2520135 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0> +gpub095:2520060:2520135 [1] NCCL INFO Using network IB +gpub095:2520060:2520135 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub095:2520060:2520135 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub095:2520060:2520135 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub095:2520060:2520135 [1] NCCL INFO Connected all rings +gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub095:2520060:2520135 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub095:2520060:2520135 [1] NCCL INFO Connected all trees +gpub095:2520060:2520135 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub095:2520060:2520135 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub095:2520060:2520135 [1] NCCL INFO comm 0xb4653490 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub098:1875739:1875739 [1] NCCL INFO cudaDriverVersion 12010 +gpub098:1875739:1875739 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1875739:1875739 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub098:1875739:1875807 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0> +gpub098:1875739:1875807 [1] NCCL INFO Using network IB +gpub098:1875739:1875807 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub098:1875739:1875807 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub098:1875739:1875807 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub098:1875739:1875807 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub098:1875739:1875807 [1] NCCL INFO Connected all rings +gpub098:1875739:1875807 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub098:1875739:1875807 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub098:1875739:1875807 [1] NCCL INFO Connected all trees +gpub098:1875739:1875807 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub098:1875739:1875807 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1875739:1875807 [1] NCCL INFO comm 0x4ffeee90 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub098:1875738:1875738 [0] NCCL INFO cudaDriverVersion 12010 +gpub098:1875738:1875738 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1875738:1875738 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub098:1875738:1875809 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0> +gpub098:1875738:1875809 [0] NCCL INFO Using network IB +gpub098:1875738:1875809 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub098:1875738:1875809 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub098:1875738:1875809 [0] NCCL INFO Connected all rings +gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Connected all trees +gpub098:1875738:1875809 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub098:1875738:1875809 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1875738:1875809 [0] NCCL INFO comm 0x9e5ca730 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub098:1875740:1875740 [2] NCCL INFO cudaDriverVersion 12010 +gpub098:1875740:1875740 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1875740:1875740 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub098:1875740:1875808 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0> +gpub098:1875740:1875808 [2] NCCL INFO Using network IB +gpub098:1875740:1875808 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub098:1875740:1875808 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub098:1875740:1875808 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub098:1875740:1875808 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub098:1875740:1875808 [2] NCCL INFO Connected all rings +gpub098:1875740:1875808 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub098:1875740:1875808 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub098:1875740:1875808 [2] NCCL INFO Connected all trees +gpub098:1875740:1875808 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub098:1875740:1875808 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1875740:1875808 [2] NCCL INFO comm 0x8c9fbb0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:4052709:4052709 [1] NCCL INFO cudaDriverVersion 12010 +gpub084:4052709:4052709 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:4052709:4052709 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:4052709:4052793 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:4052709:4052793 [1] NCCL INFO Using network IB +gpub084:4052709:4052793 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub084:4052709:4052793 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub084:4052709:4052793 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Connected all rings +gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub072:1805521:1805521 [2] NCCL INFO cudaDriverVersion 12010 +gpub072:1805521:1805521 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:1805521:1805521 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:1805521:1805605 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0> +gpub072:1805521:1805605 [2] NCCL INFO Using network IB +gpub072:1805521:1805605 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub072:1805521:1805605 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub072:1805521:1805605 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub072:1805521:1805605 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub072:1805521:1805605 [2] NCCL INFO Connected all rings +gpub072:1805521:1805605 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub072:1805521:1805605 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Connected all trees +gpub084:4052709:4052793 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:4052709:4052793 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:4052709:4052793 [1] NCCL INFO comm 0xd834420 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub072:1805521:1805605 [2] NCCL INFO Connected all trees +gpub072:1805521:1805605 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub072:1805521:1805605 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:1805521:1805605 [2] NCCL INFO comm 0x8d829e60 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub067:1574056:1574056 [2] NCCL INFO cudaDriverVersion 12010 +gpub067:1574056:1574056 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1574056:1574056 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1574056:1574133 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1574056:1574133 [2] NCCL INFO Using network IB +gpub067:1574056:1574133 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub067:1574056:1574133 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub067:1574056:1574133 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub067:1574056:1574133 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub067:1574056:1574133 [2] NCCL INFO Connected all rings +gpub067:1574056:1574133 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub067:1574056:1574133 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub067:1574056:1574133 [2] NCCL INFO Connected all trees +gpub067:1574056:1574133 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1574056:1574133 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1574056:1574133 [2] NCCL INFO comm 0xb006d7d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub013:1694056:1694056 [3] NCCL INFO cudaDriverVersion 12010 +gpub013:1694056:1694056 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1694056:1694056 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1694056:1694129 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1694056:1694129 [3] NCCL INFO Using network IB +gpub013:1694056:1694129 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub013:1694056:1694129 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub013:1694056:1694129 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub013:1694056:1694129 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub013:1694056:1694129 [3] NCCL INFO Connected all rings +gpub013:1694056:1694129 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub013:1694056:1694129 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub013:1694056:1694129 [3] NCCL INFO Connected all trees +gpub013:1694056:1694129 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub013:1694056:1694129 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1694056:1694129 [3] NCCL INFO comm 0x8c00090 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub040:2093690:2093690 [0] NCCL INFO cudaDriverVersion 12010 +gpub040:2093690:2093690 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2093690:2093690 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2093690:2093772 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2093690:2093772 [0] NCCL INFO Using network IB +gpub040:2093690:2093772 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub040:2093690:2093772 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub040:2093690:2093772 [0] NCCL INFO Connected all rings +gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Connected all trees +gpub040:2093690:2093772 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub040:2093690:2093772 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2093690:2093772 [0] NCCL INFO comm 0xba9dc4d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub014:1495255:1495255 [1] NCCL INFO cudaDriverVersion 12010 +gpub014:1495255:1495255 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1495255:1495255 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1495255:1495331 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1495255:1495331 [1] NCCL INFO Using network IB +gpub014:1495255:1495331 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub014:1495255:1495331 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub014:1495255:1495331 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub014:1495255:1495331 [1] NCCL INFO Connected all rings +gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub014:1495255:1495331 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub014:1495255:1495331 [1] NCCL INFO Connected all trees +gpub014:1495255:1495331 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub014:1495255:1495331 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1495255:1495331 [1] NCCL INFO comm 0x515d3c50 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub098:1875741:1875741 [3] NCCL INFO cudaDriverVersion 12010 +gpub098:1875741:1875741 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1875741:1875741 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub098:1875741:1875810 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0> +gpub098:1875741:1875810 [3] NCCL INFO Using network IB +gpub098:1875741:1875810 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub098:1875741:1875810 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub098:1875741:1875810 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub098:1875741:1875810 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub098:1875741:1875810 [3] NCCL INFO Connected all rings +gpub098:1875741:1875810 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub098:1875741:1875810 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub098:1875741:1875810 [3] NCCL INFO Connected all trees +gpub098:1875741:1875810 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub098:1875741:1875810 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1875741:1875810 [3] NCCL INFO comm 0x4ecd4ee0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub072:1805520:1805520 [1] NCCL INFO cudaDriverVersion 12010 +gpub072:1805520:1805520 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:1805520:1805520 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:1805520:1805604 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0> +gpub072:1805520:1805604 [1] NCCL INFO Using network IB +gpub072:1805520:1805604 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub072:1805520:1805604 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub072:1805520:1805604 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub072:1805520:1805604 [1] NCCL INFO Connected all rings +gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub072:1805520:1805604 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub072:1805520:1805604 [1] NCCL INFO Connected all trees +gpub072:1805520:1805604 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub072:1805520:1805604 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:1805520:1805604 [1] NCCL INFO comm 0xb6f41780 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub067:1574057:1574057 [3] NCCL INFO cudaDriverVersion 12010 +gpub067:1574057:1574057 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1574057:1574057 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1574057:1574132 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1574057:1574132 [3] NCCL INFO Using network IB +gpub067:1574057:1574132 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub067:1574057:1574132 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub067:1574057:1574132 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub067:1574057:1574132 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub067:1574057:1574132 [3] NCCL INFO Connected all rings +gpub067:1574057:1574132 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub067:1574057:1574132 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub067:1574057:1574132 [3] NCCL INFO Connected all trees +gpub067:1574057:1574132 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1574057:1574132 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1574057:1574132 [3] NCCL INFO comm 0x8d973650 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub014:1495257:1495257 [3] NCCL INFO cudaDriverVersion 12010 +gpub014:1495257:1495257 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1495257:1495257 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1495257:1495328 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1495257:1495328 [3] NCCL INFO Using network IB +gpub014:1495257:1495328 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub014:1495257:1495328 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub014:1495257:1495328 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub014:1495257:1495328 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub014:1495257:1495328 [3] NCCL INFO Connected all rings +gpub014:1495257:1495328 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub014:1495257:1495328 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub014:1495257:1495328 [3] NCCL INFO Connected all trees +gpub014:1495257:1495328 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub014:1495257:1495328 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1495257:1495328 [3] NCCL INFO comm 0x946a450 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub096:1645784:1645784 [0] NCCL INFO cudaDriverVersion 12010 +gpub096:1645784:1645784 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1645784:1645784 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1645784:1645856 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1645784:1645856 [0] NCCL INFO Using network IB +gpub096:1645784:1645856 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub096:1645784:1645856 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub096:1645784:1645856 [0] NCCL INFO Connected all rings +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Connected all trees +gpub096:1645784:1645856 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1645784:1645856 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1645784:1645856 [0] NCCL INFO comm 0xcdcc14f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub012:1607821:1607821 [3] NCCL INFO cudaDriverVersion 12010 +gpub012:1607821:1607821 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1607821:1607821 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1607821:1607901 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1607821:1607901 [3] NCCL INFO Using network IB +gpub012:1607821:1607901 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub012:1607821:1607901 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub012:1607821:1607901 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub012:1607821:1607901 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub012:1607821:1607901 [3] NCCL INFO Connected all rings +gpub012:1607821:1607901 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub012:1607821:1607901 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub012:1607821:1607901 [3] NCCL INFO Connected all trees +gpub012:1607821:1607901 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub012:1607821:1607901 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1607821:1607901 [3] NCCL INFO comm 0x516c3430 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub041:1527385:1527385 [2] NCCL INFO cudaDriverVersion 12010 +gpub041:1527385:1527385 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1527385:1527385 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1527385:1527462 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1527385:1527462 [2] NCCL INFO Using network IB +gpub041:1527385:1527462 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub041:1527385:1527462 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub041:1527385:1527462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub041:1527385:1527462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub041:1527385:1527462 [2] NCCL INFO Connected all rings +gpub041:1527385:1527462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub041:1527385:1527462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub041:1527385:1527462 [2] NCCL INFO Connected all trees +gpub041:1527385:1527462 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub041:1527385:1527462 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1527385:1527462 [2] NCCL INFO comm 0x5082e9e0 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub041:1527384:1527384 [1] NCCL INFO cudaDriverVersion 12010 +gpub041:1527384:1527384 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1527384:1527384 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1527384:1527459 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1527384:1527459 [1] NCCL INFO Using network IB +gpub041:1527384:1527459 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub041:1527384:1527459 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub041:1527384:1527459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub041:1527384:1527459 [1] NCCL INFO Connected all rings +gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub041:1527384:1527459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub041:1527384:1527459 [1] NCCL INFO Connected all trees +gpub041:1527384:1527459 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub041:1527384:1527459 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1527384:1527459 [1] NCCL INFO comm 0x512a04d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub041:1527383:1527383 [0] NCCL INFO cudaDriverVersion 12010 +gpub041:1527383:1527383 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1527383:1527383 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1527383:1527461 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1527383:1527461 [0] NCCL INFO Using network IB +gpub041:1527383:1527461 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub041:1527383:1527461 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub041:1527383:1527461 [0] NCCL INFO Connected all rings +gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Connected all trees +gpub041:1527383:1527461 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub041:1527383:1527461 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1527383:1527461 [0] NCCL INFO comm 0x5103b480 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub018:1650756:1650756 [3] NCCL INFO cudaDriverVersion 12010 +gpub018:1650756:1650756 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0> +gpub018:1650756:1650756 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub018:1650756:1650832 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0> +gpub018:1650756:1650832 [3] NCCL INFO Using network IB +gpub018:1650756:1650832 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub018:1650756:1650832 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub018:1650756:1650832 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub018:1650756:1650832 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub018:1650756:1650832 [3] NCCL INFO Connected all rings +gpub018:1650756:1650832 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub018:1650756:1650832 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub018:1650756:1650832 [3] NCCL INFO Connected all trees +gpub018:1650756:1650832 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub018:1650756:1650832 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub018:1650756:1650832 [3] NCCL INFO comm 0x8c504da0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub084:4052710:4052710 [2] NCCL INFO cudaDriverVersion 12010 +gpub084:4052710:4052710 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:4052710:4052710 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:4052710:4052796 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:4052710:4052796 [2] NCCL INFO Using network IB +gpub084:4052710:4052796 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub084:4052710:4052796 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub084:4052710:4052796 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub084:4052710:4052796 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub084:4052710:4052796 [2] NCCL INFO Connected all rings +gpub084:4052710:4052796 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub084:4052710:4052796 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub084:4052710:4052796 [2] NCCL INFO Connected all trees +gpub084:4052710:4052796 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:4052710:4052796 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:4052710:4052796 [2] NCCL INFO comm 0x4f81fce0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub014:1495256:1495256 [2] NCCL INFO cudaDriverVersion 12010 +gpub014:1495256:1495256 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1495256:1495256 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1495256:1495330 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1495256:1495330 [2] NCCL INFO Using network IB +gpub014:1495256:1495330 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub014:1495256:1495330 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub014:1495256:1495330 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub014:1495256:1495330 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub014:1495256:1495330 [2] NCCL INFO Connected all rings +gpub014:1495256:1495330 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub014:1495256:1495330 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub018:1650755:1650755 [2] NCCL INFO cudaDriverVersion 12010 +gpub018:1650755:1650755 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0> +gpub018:1650755:1650755 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub018:1650755:1650833 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0> +gpub018:1650755:1650833 [2] NCCL INFO Using network IB +gpub018:1650755:1650833 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub018:1650755:1650833 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub018:1650755:1650833 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub018:1650755:1650833 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub018:1650755:1650833 [2] NCCL INFO Connected all rings +gpub018:1650755:1650833 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub018:1650755:1650833 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub014:1495256:1495330 [2] NCCL INFO Connected all trees +gpub014:1495256:1495330 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub014:1495256:1495330 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1495256:1495330 [2] NCCL INFO comm 0x9f383a90 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub018:1650755:1650833 [2] NCCL INFO Connected all trees +gpub018:1650755:1650833 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub018:1650755:1650833 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub018:1650755:1650833 [2] NCCL INFO comm 0x513374c0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub040:2093692:2093692 [2] NCCL INFO cudaDriverVersion 12010 +gpub040:2093692:2093692 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2093692:2093692 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2093692:2093775 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2093692:2093775 [2] NCCL INFO Using network IB +gpub040:2093692:2093775 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub040:2093692:2093775 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub040:2093692:2093775 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub040:2093692:2093775 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub040:2093692:2093775 [2] NCCL INFO Connected all rings +gpub040:2093692:2093775 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub040:2093692:2093775 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub084:4052711:4052711 [3] NCCL INFO cudaDriverVersion 12010 +gpub084:4052711:4052711 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:4052711:4052711 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:4052711:4052795 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:4052711:4052795 [3] NCCL INFO Using network IB +gpub084:4052711:4052795 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub084:4052711:4052795 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub084:4052711:4052795 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub084:4052711:4052795 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub084:4052711:4052795 [3] NCCL INFO Connected all rings +gpub084:4052711:4052795 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub084:4052711:4052795 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub040:2093692:2093775 [2] NCCL INFO Connected all trees +gpub040:2093692:2093775 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub040:2093692:2093775 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2093692:2093775 [2] NCCL INFO comm 0x514bd130 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:4052711:4052795 [3] NCCL INFO Connected all trees +gpub084:4052711:4052795 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:4052711:4052795 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:4052711:4052795 [3] NCCL INFO comm 0xa5710b50 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub072:1805519:1805519 [0] NCCL INFO cudaDriverVersion 12010 +gpub072:1805519:1805519 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:1805519:1805519 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:1805519:1805602 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0> +gpub072:1805519:1805602 [0] NCCL INFO Using network IB +gpub072:1805519:1805602 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub072:1805519:1805602 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub072:1805519:1805602 [0] NCCL INFO Connected all rings +gpub012:1607819:1607819 [1] NCCL INFO cudaDriverVersion 12010 +gpub012:1607819:1607819 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1607819:1607819 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1607819:1607899 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1607819:1607899 [1] NCCL INFO Using network IB +gpub012:1607819:1607899 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub012:1607819:1607899 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub012:1607819:1607899 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub012:1607819:1607899 [1] NCCL INFO Connected all rings +gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Connected all trees +gpub072:1805519:1805602 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub072:1805519:1805602 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:1805519:1805602 [0] NCCL INFO comm 0x4fb13ad0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub012:1607819:1607899 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub012:1607819:1607899 [1] NCCL INFO Connected all trees +gpub012:1607819:1607899 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub012:1607819:1607899 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1607819:1607899 [1] NCCL INFO comm 0xa4ee840 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub039:2093177:2093177 [2] NCCL INFO cudaDriverVersion 12010 +gpub039:2093177:2093177 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:2093177:2093177 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:2093177:2093242 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:2093177:2093242 [2] NCCL INFO Using network IB +gpub039:2093177:2093242 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub039:2093177:2093242 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub039:2093177:2093242 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub039:2093177:2093242 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub039:2093177:2093242 [2] NCCL INFO Connected all rings +gpub039:2093177:2093242 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub039:2093177:2093242 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub039:2093177:2093242 [2] NCCL INFO Connected all trees +gpub039:2093177:2093242 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub039:2093177:2093242 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:2093177:2093242 [2] NCCL INFO comm 0xa965b10 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub097:1705871:1705871 [3] NCCL INFO cudaDriverVersion 12010 +gpub097:1705871:1705871 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0> +gpub097:1705871:1705871 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub097:1705871:1705957 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0> +gpub097:1705871:1705957 [3] NCCL INFO Using network IB +gpub097:1705871:1705957 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub097:1705871:1705957 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub097:1705871:1705957 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub097:1705871:1705957 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub097:1705871:1705957 [3] NCCL INFO Connected all rings +gpub097:1705871:1705957 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub097:1705871:1705957 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub097:1705871:1705957 [3] NCCL INFO Connected all trees +gpub097:1705871:1705957 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub097:1705871:1705957 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub097:1705871:1705957 [3] NCCL INFO comm 0x94d2db0 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub097:1705870:1705870 [2] NCCL INFO cudaDriverVersion 12010 +gpub097:1705870:1705870 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0> +gpub097:1705870:1705870 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub097:1705870:1705956 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0> +gpub097:1705870:1705956 [2] NCCL INFO Using network IB +gpub097:1705870:1705956 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub097:1705870:1705956 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub097:1705870:1705956 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub097:1705870:1705956 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub097:1705870:1705956 [2] NCCL INFO Connected all rings +gpub097:1705870:1705956 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub097:1705870:1705956 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub039:2093175:2093175 [0] NCCL INFO cudaDriverVersion 12010 +gpub039:2093175:2093175 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:2093175:2093175 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:2093175:2093244 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:2093175:2093244 [0] NCCL INFO Using network IB +gpub039:2093175:2093244 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub039:2093175:2093244 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub039:2093175:2093244 [0] NCCL INFO Connected all rings +gpub097:1705870:1705956 [2] NCCL INFO Connected all trees +gpub097:1705870:1705956 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub097:1705870:1705956 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub097:1705870:1705956 [2] NCCL INFO comm 0x50f117a0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Connected all trees +gpub039:2093175:2093244 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub039:2093175:2093244 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:2093175:2093244 [0] NCCL INFO comm 0xa2cab60 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub041:1527386:1527386 [3] NCCL INFO cudaDriverVersion 12010 +gpub041:1527386:1527386 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1527386:1527386 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1527386:1527460 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1527386:1527460 [3] NCCL INFO Using network IB +gpub041:1527386:1527460 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub041:1527386:1527460 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub041:1527386:1527460 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub041:1527386:1527460 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub041:1527386:1527460 [3] NCCL INFO Connected all rings +gpub041:1527386:1527460 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub041:1527386:1527460 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub030:2531969:2531969 [0] NCCL INFO cudaDriverVersion 12010 +gpub030:2531969:2531969 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2531969:2531969 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2531969:2532049 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2531969:2532049 [0] NCCL INFO Using network IB +gpub030:2531969:2532049 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub030:2531969:2532049 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub030:2531969:2532049 [0] NCCL INFO Connected all rings +gpub041:1527386:1527460 [3] NCCL INFO Connected all trees +gpub041:1527386:1527460 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub041:1527386:1527460 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1527386:1527460 [3] NCCL INFO comm 0x4f979490 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub012:1607818:1607818 [0] NCCL INFO cudaDriverVersion 12010 +gpub012:1607818:1607818 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1607818:1607818 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1607818:1607902 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1607818:1607902 [0] NCCL INFO Using network IB +gpub012:1607818:1607902 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub012:1607818:1607902 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub012:1607818:1607902 [0] NCCL INFO Connected all rings +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Connected all trees +gpub030:2531969:2532049 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2531969:2532049 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2531969:2532049 [0] NCCL INFO comm 0xb4f6de0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Connected all trees +gpub012:1607818:1607902 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub012:1607818:1607902 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1607818:1607902 [0] NCCL INFO comm 0xa8f3bc80 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub039:2093178:2093178 [3] NCCL INFO cudaDriverVersion 12010 +gpub039:2093178:2093178 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:2093178:2093178 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:2093178:2093243 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:2093178:2093243 [3] NCCL INFO Using network IB +gpub039:2093178:2093243 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub039:2093178:2093243 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub039:2093178:2093243 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub039:2093178:2093243 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub039:2093178:2093243 [3] NCCL INFO Connected all rings +gpub039:2093178:2093243 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub039:2093178:2093243 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub039:2093178:2093243 [3] NCCL INFO Connected all trees +gpub039:2093178:2093243 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub039:2093178:2093243 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:2093178:2093243 [3] NCCL INFO comm 0x4fc75960 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub030:2531970:2531970 [1] NCCL INFO cudaDriverVersion 12010 +gpub030:2531970:2531970 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2531970:2531970 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2531970:2532052 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2531970:2532052 [1] NCCL INFO Using network IB +gpub030:2531970:2532052 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub030:2531970:2532052 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub030:2531970:2532052 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub030:2531970:2532052 [1] NCCL INFO Connected all rings +gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub030:2531970:2532052 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub030:2531970:2532052 [1] NCCL INFO Connected all trees +gpub030:2531970:2532052 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2531970:2532052 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2531970:2532052 [1] NCCL INFO comm 0x8ebc3340 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub012:1607820:1607820 [2] NCCL INFO cudaDriverVersion 12010 +gpub012:1607820:1607820 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1607820:1607820 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1607820:1607900 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1607820:1607900 [2] NCCL INFO Using network IB +gpub012:1607820:1607900 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub012:1607820:1607900 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub012:1607820:1607900 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub012:1607820:1607900 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub012:1607820:1607900 [2] NCCL INFO Connected all rings +gpub012:1607820:1607900 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub012:1607820:1607900 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub072:1805522:1805522 [3] NCCL INFO cudaDriverVersion 12010 +gpub072:1805522:1805522 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:1805522:1805522 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:1805522:1805603 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0> +gpub072:1805522:1805603 [3] NCCL INFO Using network IB +gpub072:1805522:1805603 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub072:1805522:1805603 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub072:1805522:1805603 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub072:1805522:1805603 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub072:1805522:1805603 [3] NCCL INFO Connected all rings +gpub072:1805522:1805603 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub072:1805522:1805603 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub039:2093176:2093176 [1] NCCL INFO cudaDriverVersion 12010 +gpub039:2093176:2093176 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:2093176:2093176 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:2093176:2093245 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:2093176:2093245 [1] NCCL INFO Using network IB +gpub039:2093176:2093245 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub039:2093176:2093245 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub039:2093176:2093245 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub039:2093176:2093245 [1] NCCL INFO Connected all rings +gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub012:1607820:1607900 [2] NCCL INFO Connected all trees +gpub012:1607820:1607900 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub012:1607820:1607900 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1607820:1607900 [2] NCCL INFO comm 0x503f1430 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub072:1805522:1805603 [3] NCCL INFO Connected all trees +gpub072:1805522:1805603 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub072:1805522:1805603 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:1805522:1805603 [3] NCCL INFO comm 0x50740450 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub039:2093176:2093245 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub039:2093176:2093245 [1] NCCL INFO Connected all trees +gpub039:2093176:2093245 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub039:2093176:2093245 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:2093176:2093245 [1] NCCL INFO comm 0xbcbaabd0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub030:2531972:2531972 [3] NCCL INFO cudaDriverVersion 12010 +gpub030:2531972:2531972 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2531972:2531972 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2531972:2532050 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2531972:2532050 [3] NCCL INFO Using network IB +gpub030:2531972:2532050 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub030:2531972:2532050 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub030:2531972:2532050 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub030:2531972:2532050 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub030:2531972:2532050 [3] NCCL INFO Connected all rings +gpub030:2531972:2532050 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub030:2531972:2532050 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub030:2531972:2532050 [3] NCCL INFO Connected all trees +gpub030:2531972:2532050 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2531972:2532050 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2531972:2532050 [3] NCCL INFO comm 0xa2cf1d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub018:1650754:1650754 [1] NCCL INFO cudaDriverVersion 12010 +gpub018:1650754:1650754 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0> +gpub018:1650754:1650754 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub018:1650754:1650831 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0> +gpub018:1650754:1650831 [1] NCCL INFO Using network IB +gpub018:1650754:1650831 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub018:1650754:1650831 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub018:1650754:1650831 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub018:1650754:1650831 [1] NCCL INFO Connected all rings +gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub018:1650754:1650831 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub018:1650754:1650831 [1] NCCL INFO Connected all trees +gpub018:1650754:1650831 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub018:1650754:1650831 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub018:1650754:1650831 [1] NCCL INFO comm 0xa938d420 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub096:1645787:1645787 [3] NCCL INFO cudaDriverVersion 12010 +gpub096:1645787:1645787 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1645787:1645787 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1645787:1645858 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1645787:1645858 [3] NCCL INFO Using network IB +gpub096:1645787:1645858 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub096:1645787:1645858 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub096:1645787:1645858 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub096:1645787:1645858 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub096:1645787:1645858 [3] NCCL INFO Connected all rings +gpub096:1645787:1645858 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub096:1645787:1645858 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub096:1645787:1645858 [3] NCCL INFO Connected all trees +gpub096:1645787:1645858 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1645787:1645858 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1645787:1645858 [3] NCCL INFO comm 0xb78b6390 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub018:1650753:1650753 [0] NCCL INFO cudaDriverVersion 12010 +gpub018:1650753:1650753 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0> +gpub018:1650753:1650753 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub018:1650753:1650834 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0> +gpub018:1650753:1650834 [0] NCCL INFO Using network IB +gpub018:1650753:1650834 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub018:1650753:1650834 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub018:1650753:1650834 [0] NCCL INFO Connected all rings +gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Connected all trees +gpub018:1650753:1650834 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub018:1650753:1650834 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub018:1650753:1650834 [0] NCCL INFO comm 0x4f7a1b90 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub097:1705868:1705868 [0] NCCL INFO cudaDriverVersion 12010 +gpub097:1705868:1705868 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0> +gpub097:1705868:1705868 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub097:1705868:1705958 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0> +gpub097:1705868:1705958 [0] NCCL INFO Using network IB +gpub097:1705868:1705958 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub097:1705868:1705958 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub097:1705868:1705958 [0] NCCL INFO Connected all rings +gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Connected all trees +gpub097:1705868:1705958 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub097:1705868:1705958 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub097:1705868:1705958 [0] NCCL INFO comm 0x4f565ad0 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub030:2531971:2531971 [2] NCCL INFO cudaDriverVersion 12010 +gpub030:2531971:2531971 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2531971:2531971 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2531971:2532051 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2531971:2532051 [2] NCCL INFO Using network IB +gpub030:2531971:2532051 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub030:2531971:2532051 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub030:2531971:2532051 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub030:2531971:2532051 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub030:2531971:2532051 [2] NCCL INFO Connected all rings +gpub030:2531971:2532051 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub030:2531971:2532051 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub030:2531971:2532051 [2] NCCL INFO Connected all trees +gpub030:2531971:2532051 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2531971:2532051 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2531971:2532051 [2] NCCL INFO comm 0x8dd18cd0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:4052708:4052708 [0] NCCL INFO cudaDriverVersion 12010 +gpub084:4052708:4052708 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:4052708:4052708 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:4052708:4052794 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:4052708:4052794 [0] NCCL INFO Using network IB +gpub084:4052708:4052794 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub084:4052708:4052794 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub084:4052708:4052794 [0] NCCL INFO Connected all rings +gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Connected all trees +gpub084:4052708:4052794 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:4052708:4052794 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:4052708:4052794 [0] NCCL INFO comm 0xb576c9d0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub096:1645786:1645786 [2] NCCL INFO cudaDriverVersion 12010 +gpub096:1645786:1645786 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1645786:1645786 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1645786:1645857 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1645786:1645857 [2] NCCL INFO Using network IB +gpub096:1645786:1645857 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub096:1645786:1645857 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub096:1645786:1645857 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub096:1645786:1645857 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub096:1645786:1645857 [2] NCCL INFO Connected all rings +gpub096:1645786:1645857 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub096:1645786:1645857 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub096:1645786:1645857 [2] NCCL INFO Connected all trees +gpub096:1645786:1645857 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1645786:1645857 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1645786:1645857 [2] NCCL INFO comm 0x4fe9cb90 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub097:1705869:1705869 [1] NCCL INFO cudaDriverVersion 12010 +gpub097:1705869:1705869 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0> +gpub097:1705869:1705869 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub097:1705869:1705955 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0> +gpub097:1705869:1705955 [1] NCCL INFO Using network IB +gpub097:1705869:1705955 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub097:1705869:1705955 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub097:1705869:1705955 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub097:1705869:1705955 [1] NCCL INFO Connected all rings +gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub097:1705869:1705955 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub097:1705869:1705955 [1] NCCL INFO Connected all trees +gpub097:1705869:1705955 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub097:1705869:1705955 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub097:1705869:1705955 [1] NCCL INFO comm 0x8e89510 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub040:2093691:2093691 [1] NCCL INFO cudaDriverVersion 12010 +gpub040:2093691:2093691 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2093691:2093691 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2093691:2093774 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2093691:2093774 [1] NCCL INFO Using network IB +gpub040:2093691:2093774 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub040:2093691:2093774 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub040:2093691:2093774 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub040:2093691:2093774 [1] NCCL INFO Connected all rings +gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub040:2093691:2093774 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub040:2093691:2093774 [1] NCCL INFO Connected all trees +gpub040:2093691:2093774 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub040:2093691:2093774 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2093691:2093774 [1] NCCL INFO comm 0xb9336880 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub096:1645785:1645785 [1] NCCL INFO cudaDriverVersion 12010 +gpub096:1645785:1645785 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1645785:1645785 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1645785:1645855 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1645785:1645855 [1] NCCL INFO Using network IB +gpub096:1645785:1645855 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub096:1645785:1645855 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub096:1645785:1645855 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub096:1645785:1645855 [1] NCCL INFO Connected all rings +gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub096:1645785:1645855 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub096:1645785:1645855 [1] NCCL INFO Connected all trees +gpub096:1645785:1645855 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1645785:1645855 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1645785:1645855 [1] NCCL INFO comm 0x50f7e840 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub014:1495254:1495254 [0] NCCL INFO cudaDriverVersion 12010 +gpub014:1495254:1495254 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1495254:1495254 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1495254:1495329 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1495254:1495329 [0] NCCL INFO Using network IB +gpub014:1495254:1495329 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub014:1495254:1495329 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub014:1495254:1495329 [0] NCCL INFO Connected all rings +gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Connected all trees +gpub014:1495254:1495329 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub014:1495254:1495329 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1495254:1495329 [0] NCCL INFO comm 0x50fe0a80 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub040:2093693:2093693 [3] NCCL INFO cudaDriverVersion 12010 +gpub040:2093693:2093693 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2093693:2093693 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2093693:2093773 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2093693:2093773 [3] NCCL INFO Using network IB +gpub040:2093693:2093773 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub040:2093693:2093773 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub040:2093693:2093773 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub040:2093693:2093773 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub040:2093693:2093773 [3] NCCL INFO Connected all rings +gpub040:2093693:2093773 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub040:2093693:2093773 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub040:2093693:2093773 [3] NCCL INFO Connected all trees +gpub040:2093693:2093773 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub040:2093693:2093773 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2093693:2093773 [3] NCCL INFO comm 0xbd6eac10 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub005:0/64] 2023-07-07 20:14:03,233 (trainer:732) INFO: 23epoch:train:1-100batch: iter_time=1.234, forward_time=0.245, loss_ctc=73.225, loss_att=57.657, acc=0.705, loss=62.327, backward_time=1.047, grad_norm=105.098, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.184, optim0_lr0=7.714e-05, train_time=5.552 +[gpub005:0/64] 2023-07-07 20:16:18,832 (trainer:732) INFO: 23epoch:train:101-200batch: iter_time=1.217e-04, forward_time=0.142, loss_ctc=66.064, loss_att=55.513, acc=0.684, loss=58.678, backward_time=1.027, grad_norm=122.006, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.712e-05, train_time=2.713 +[gpub005:0/64] 2023-07-07 20:18:34,753 (trainer:732) INFO: 23epoch:train:201-300batch: iter_time=1.234e-04, forward_time=0.143, loss_ctc=92.044, loss_att=64.944, acc=0.703, loss=73.074, backward_time=1.025, grad_norm=143.859, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.711e-05, train_time=2.718 +[gpub005:0/64] 2023-07-07 20:20:49,996 (trainer:732) INFO: 23epoch:train:301-400batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=74.575, loss_att=60.629, acc=0.698, loss=64.813, backward_time=1.025, grad_norm=150.059, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.709e-05, train_time=2.705 +[gpub005:0/64] 2023-07-07 20:23:05,113 (trainer:732) INFO: 23epoch:train:401-500batch: iter_time=1.261e-04, forward_time=0.142, loss_ctc=80.331, loss_att=61.701, acc=0.708, loss=67.290, backward_time=1.024, grad_norm=120.003, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.707e-05, train_time=2.702 +[gpub005:0/64] 2023-07-07 20:25:19,924 (trainer:732) INFO: 23epoch:train:501-600batch: iter_time=1.247e-04, forward_time=0.142, loss_ctc=70.043, loss_att=53.869, acc=0.699, loss=58.721, backward_time=1.021, grad_norm=109.628, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.705e-05, train_time=2.696 +[gpub005:0/64] 2023-07-07 20:27:40,430 (trainer:732) INFO: 23epoch:train:601-700batch: iter_time=1.208e-04, forward_time=0.143, loss_ctc=83.967, loss_att=61.947, acc=0.692, loss=68.553, backward_time=1.033, grad_norm=141.886, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.703e-05, train_time=2.810 +[gpub005:0/64] 2023-07-07 20:30:06,403 (trainer:732) INFO: 23epoch:train:701-800batch: iter_time=1.237e-04, forward_time=0.142, loss_ctc=75.476, loss_att=56.916, acc=0.696, loss=62.484, backward_time=1.034, grad_norm=118.855, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.701e-05, train_time=2.919 +[gpub005:0/64] 2023-07-07 20:31:01,886 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-07 20:31:19,436 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:31:22,919 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:31:22,919 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-07 20:31:22,925 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 20:37:15,137 (trainer:732) INFO: 23epoch:train:801-900batch: iter_time=1.375, forward_time=0.172, loss_ctc=72.566, loss_att=53.768, acc=0.698, loss=59.408, backward_time=1.047, grad_norm=114.089, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.182, optim0_lr0=7.700e-05, train_time=8.574 +[gpub005:0/64] 2023-07-07 20:39:31,293 (trainer:732) INFO: 23epoch:train:901-1000batch: iter_time=1.203e-04, forward_time=0.146, loss_ctc=65.125, loss_att=51.474, acc=0.694, loss=55.569, backward_time=1.026, grad_norm=100.552, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.182, optim0_lr0=7.698e-05, train_time=2.724 +[gpub005:0/64] 2023-07-07 20:41:47,156 (trainer:732) INFO: 23epoch:train:1001-1100batch: iter_time=1.348e-04, forward_time=0.146, loss_ctc=83.846, loss_att=65.257, acc=0.696, loss=70.834, backward_time=1.027, grad_norm=117.681, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.696e-05, train_time=2.717 +[gpub005:0/64] 2023-07-07 20:44:02,949 (trainer:732) INFO: 23epoch:train:1101-1200batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=75.335, loss_att=57.448, acc=0.703, loss=62.814, backward_time=1.027, grad_norm=97.808, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.694e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 20:46:19,227 (trainer:732) INFO: 23epoch:train:1201-1300batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=78.643, loss_att=64.187, acc=0.709, loss=68.524, backward_time=1.029, grad_norm=106.806, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.692e-05, train_time=2.725 +[gpub005:0/64] 2023-07-07 20:48:36,388 (trainer:732) INFO: 23epoch:train:1301-1400batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=67.582, loss_att=50.486, acc=0.715, loss=55.615, backward_time=1.026, grad_norm=134.933, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.690e-05, train_time=2.743 +[gpub005:0/64] 2023-07-07 20:50:52,551 (trainer:732) INFO: 23epoch:train:1401-1500batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=80.110, loss_att=60.648, acc=0.688, loss=66.487, backward_time=1.028, grad_norm=128.046, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.689e-05, train_time=2.723 +[gpub005:0/64] 2023-07-07 20:53:11,035 (trainer:732) INFO: 23epoch:train:1501-1600batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=70.534, loss_att=57.154, acc=0.699, loss=61.168, backward_time=1.027, grad_norm=102.436, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.687e-05, train_time=2.769 +[gpub005:0/64] 2023-07-07 20:54:56,286 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-07 20:55:14,037 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:55:17,531 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:55:17,531 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-07 20:55:17,538 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 21:00:02,783 (trainer:732) INFO: 23epoch:train:1601-1700batch: iter_time=2.623, forward_time=0.186, loss_ctc=70.658, loss_att=52.058, acc=0.699, loss=57.638, backward_time=1.034, grad_norm=90.957, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.183, optim0_lr0=7.685e-05, train_time=8.235 +[gpub005:0/64] 2023-07-07 21:02:18,930 (trainer:732) INFO: 23epoch:train:1701-1800batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=70.962, loss_att=57.990, acc=0.707, loss=61.882, backward_time=1.026, grad_norm=106.925, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.683e-05, train_time=2.723 +[gpub005:0/64] 2023-07-07 21:04:34,887 (trainer:732) INFO: 23epoch:train:1801-1900batch: iter_time=1.197e-04, forward_time=0.143, loss_ctc=73.987, loss_att=55.470, acc=0.695, loss=61.025, backward_time=1.025, grad_norm=128.929, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.681e-05, train_time=2.719 +[gpub005:0/64] 2023-07-07 21:06:50,806 (trainer:732) INFO: 23epoch:train:1901-2000batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=84.894, loss_att=66.262, acc=0.695, loss=71.851, backward_time=1.024, grad_norm=114.319, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.680e-05, train_time=2.718 +[gpub005:0/64] 2023-07-07 21:09:06,612 (trainer:732) INFO: 23epoch:train:2001-2100batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=72.489, loss_att=59.553, acc=0.708, loss=63.434, backward_time=1.026, grad_norm=101.371, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.678e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 21:11:22,119 (trainer:732) INFO: 23epoch:train:2101-2200batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=71.626, loss_att=54.390, acc=0.708, loss=59.561, backward_time=1.025, grad_norm=107.121, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.676e-05, train_time=2.710 +[gpub005:0/64] 2023-07-07 21:13:37,533 (trainer:732) INFO: 23epoch:train:2201-2300batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=79.447, loss_att=59.205, acc=0.696, loss=65.278, backward_time=1.023, grad_norm=121.579, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.674e-05, train_time=2.708 +[gpub005:0/64] 2023-07-07 21:15:53,132 (trainer:732) INFO: 23epoch:train:2301-2400batch: iter_time=1.182e-04, forward_time=0.144, loss_ctc=75.627, loss_att=58.332, acc=0.698, loss=63.520, backward_time=1.024, grad_norm=126.568, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.672e-05, train_time=2.712 +[gpub005:0/64] 2023-07-07 21:18:08,467 (trainer:732) INFO: 23epoch:train:2401-2500batch: iter_time=1.198e-04, forward_time=0.144, loss_ctc=65.804, loss_att=53.629, acc=0.695, loss=57.282, backward_time=1.023, grad_norm=93.403, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.670e-05, train_time=2.706 +[gpub005:0/64] 2023-07-07 21:18:11,282 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-07 21:18:29,130 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 21:18:32,569 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 21:18:32,569 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-07 21:18:32,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 21:24:42,763 (trainer:732) INFO: 23epoch:train:2501-2600batch: iter_time=1.242, forward_time=0.174, loss_ctc=73.983, loss_att=58.373, acc=0.706, loss=63.056, backward_time=1.034, grad_norm=100.591, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.183, optim0_lr0=7.669e-05, train_time=7.886 +[gpub005:0/64] 2023-07-07 21:26:58,297 (trainer:732) INFO: 23epoch:train:2601-2700batch: iter_time=1.190e-04, forward_time=0.144, loss_ctc=64.347, loss_att=52.941, acc=0.692, loss=56.362, backward_time=1.022, grad_norm=103.514, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.667e-05, train_time=2.710 +[gpub005:0/64] 2023-07-07 21:29:14,466 (trainer:732) INFO: 23epoch:train:2701-2800batch: iter_time=1.356e-04, forward_time=0.146, loss_ctc=86.482, loss_att=62.946, acc=0.706, loss=70.007, backward_time=1.028, grad_norm=116.532, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.665e-05, train_time=2.723 +[gpub005:0/64] 2023-07-07 21:31:30,282 (trainer:732) INFO: 23epoch:train:2801-2900batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=75.022, loss_att=60.111, acc=0.700, loss=64.585, backward_time=1.027, grad_norm=102.153, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.663e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 21:33:45,972 (trainer:732) INFO: 23epoch:train:2901-3000batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=79.575, loss_att=61.287, acc=0.710, loss=66.773, backward_time=1.024, grad_norm=116.778, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.661e-05, train_time=2.714 +[gpub005:0/64] 2023-07-07 21:36:01,542 (trainer:732) INFO: 23epoch:train:3001-3100batch: iter_time=1.329e-04, forward_time=0.144, loss_ctc=66.483, loss_att=51.229, acc=0.704, loss=55.805, backward_time=1.024, grad_norm=92.400, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.660e-05, train_time=2.711 +[gpub005:0/64] 2023-07-07 21:38:17,343 (trainer:732) INFO: 23epoch:train:3101-3200batch: iter_time=1.364e-04, forward_time=0.146, loss_ctc=78.366, loss_att=58.791, acc=0.696, loss=64.664, backward_time=1.027, grad_norm=123.479, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.658e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 21:40:32,848 (trainer:732) INFO: 23epoch:train:3201-3300batch: iter_time=1.406e-04, forward_time=0.144, loss_ctc=70.759, loss_att=54.241, acc=0.709, loss=59.197, backward_time=1.024, grad_norm=101.288, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.656e-05, train_time=2.710 +[gpub005:0/64] 2023-07-07 21:41:20,432 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-07 21:41:38,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 21:41:42,003 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 21:41:42,004 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-07 21:41:42,010 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 21:48:03,969 (trainer:732) INFO: 23epoch:train:3301-3400batch: iter_time=1.232, forward_time=0.144, loss_ctc=68.130, loss_att=52.958, acc=0.698, loss=57.509, backward_time=1.041, grad_norm=100.963, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.180, optim0_lr0=7.654e-05, train_time=9.022 +[gpub005:0/64] 2023-07-07 21:50:20,129 (trainer:732) INFO: 23epoch:train:3401-3500batch: iter_time=1.270e-04, forward_time=0.144, loss_ctc=69.763, loss_att=54.215, acc=0.711, loss=58.879, backward_time=1.024, grad_norm=98.620, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.653e-05, train_time=2.723 +[gpub005:0/64] 2023-07-07 21:52:37,246 (trainer:732) INFO: 23epoch:train:3501-3600batch: iter_time=1.350e-04, forward_time=0.146, loss_ctc=72.406, loss_att=56.129, acc=0.702, loss=61.013, backward_time=1.029, grad_norm=107.814, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.651e-05, train_time=2.742 +[gpub005:0/64] 2023-07-07 21:54:53,208 (trainer:732) INFO: 23epoch:train:3601-3700batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=85.578, loss_att=64.328, acc=0.701, loss=70.703, backward_time=1.026, grad_norm=95.956, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.649e-05, train_time=2.719 +[gpub005:0/64] 2023-07-07 21:57:09,044 (trainer:732) INFO: 23epoch:train:3701-3800batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=74.007, loss_att=59.821, acc=0.714, loss=64.077, backward_time=1.026, grad_norm=88.845, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.647e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 21:59:39,441 (trainer:732) INFO: 23epoch:train:3801-3900batch: iter_time=1.029e-04, forward_time=0.143, loss_ctc=67.054, loss_att=49.799, acc=0.712, loss=54.975, backward_time=1.033, grad_norm=97.210, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.645e-05, train_time=3.008 +[gpub005:0/64] 2023-07-07 22:01:56,581 (trainer:732) INFO: 23epoch:train:3901-4000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=78.400, loss_att=56.731, acc=0.705, loss=63.232, backward_time=1.027, grad_norm=108.203, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.644e-05, train_time=2.743 +[gpub005:0/64] 2023-07-07 22:04:12,336 (trainer:732) INFO: 23epoch:train:4001-4100batch: iter_time=1.080e-04, forward_time=0.144, loss_ctc=74.332, loss_att=57.767, acc=0.700, loss=62.737, backward_time=1.025, grad_norm=119.370, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.642e-05, train_time=2.715 +[gpub005:0/64] 2023-07-07 22:05:44,331 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-07 22:06:02,568 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 22:06:05,982 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 22:06:05,982 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-07 22:06:05,988 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 22:10:46,670 (trainer:732) INFO: 23epoch:train:4101-4200batch: iter_time=1.259, forward_time=0.154, loss_ctc=66.157, loss_att=48.178, acc=0.703, loss=53.572, backward_time=1.035, grad_norm=89.639, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.640e-05, train_time=7.886 +[gpub005:0/64] 2023-07-07 22:13:02,991 (trainer:732) INFO: 23epoch:train:4201-4300batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=69.203, loss_att=58.283, acc=0.704, loss=61.559, backward_time=1.026, grad_norm=86.447, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.638e-05, train_time=2.727 +[gpub005:0/64] 2023-07-07 22:15:18,474 (trainer:732) INFO: 23epoch:train:4301-4400batch: iter_time=1.017e-04, forward_time=0.143, loss_ctc=71.569, loss_att=54.837, acc=0.692, loss=59.857, backward_time=1.023, grad_norm=112.955, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.636e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 22:17:33,912 (trainer:732) INFO: 23epoch:train:4401-4500batch: iter_time=1.015e-04, forward_time=0.143, loss_ctc=83.345, loss_att=66.902, acc=0.686, loss=71.835, backward_time=1.023, grad_norm=107.092, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.635e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 22:19:49,701 (trainer:732) INFO: 23epoch:train:4501-4600batch: iter_time=1.031e-04, forward_time=0.144, loss_ctc=72.806, loss_att=58.981, acc=0.706, loss=63.129, backward_time=1.024, grad_norm=108.783, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.633e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 22:22:05,139 (trainer:732) INFO: 23epoch:train:4601-4700batch: iter_time=1.114e-04, forward_time=0.143, loss_ctc=71.685, loss_att=55.501, acc=0.700, loss=60.356, backward_time=1.022, grad_norm=110.110, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.631e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 22:24:20,485 (trainer:732) INFO: 23epoch:train:4701-4800batch: iter_time=1.072e-04, forward_time=0.143, loss_ctc=79.087, loss_att=57.787, acc=0.699, loss=64.177, backward_time=1.021, grad_norm=100.805, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.629e-05, train_time=2.707 +[gpub005:0/64] 2023-07-07 22:26:35,978 (trainer:732) INFO: 23epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.144, loss_ctc=73.503, loss_att=58.364, acc=0.689, loss=62.906, backward_time=1.023, grad_norm=109.600, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.628e-05, train_time=2.710 +[gpub005:0/64] 2023-07-07 22:28:51,076 (trainer:732) INFO: 23epoch:train:4901-5000batch: iter_time=1.202e-04, forward_time=0.143, loss_ctc=64.689, loss_att=54.362, acc=0.690, loss=57.460, backward_time=1.021, grad_norm=152.209, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.626e-05, train_time=2.702 +[gpub005:0/64] 2023-07-07 22:28:55,889 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-07 22:29:14,276 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 22:29:17,689 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 22:29:17,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-07 22:29:17,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 22:37:00,414 (trainer:732) INFO: 23epoch:train:5001-5100batch: iter_time=1.337, forward_time=0.170, loss_ctc=73.081, loss_att=56.227, acc=0.704, loss=61.283, backward_time=1.034, grad_norm=153.240, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.624e-05, train_time=9.787 +[gpub005:0/64] 2023-07-07 22:39:16,136 (trainer:732) INFO: 23epoch:train:5101-5200batch: iter_time=1.064e-04, forward_time=0.145, loss_ctc=64.449, loss_att=53.017, acc=0.688, loss=56.447, backward_time=1.022, grad_norm=97.208, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.622e-05, train_time=2.714 +[gpub005:0/64] 2023-07-07 22:41:31,530 (trainer:732) INFO: 23epoch:train:5201-5300batch: iter_time=1.159e-04, forward_time=0.144, loss_ctc=84.647, loss_att=63.396, acc=0.696, loss=69.772, backward_time=1.022, grad_norm=129.990, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.620e-05, train_time=2.708 +[gpub005:0/64] 2023-07-07 22:43:47,077 (trainer:732) INFO: 23epoch:train:5301-5400batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=71.859, loss_att=58.614, acc=0.699, loss=62.588, backward_time=1.024, grad_norm=94.894, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.619e-05, train_time=2.711 +[gpub005:0/64] 2023-07-07 22:46:02,833 (trainer:732) INFO: 23epoch:train:5401-5500batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=77.229, loss_att=59.686, acc=0.711, loss=64.949, backward_time=1.026, grad_norm=107.512, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.182, optim0_lr0=7.617e-05, train_time=2.715 +[gpub005:0/64] 2023-07-07 22:48:18,143 (trainer:732) INFO: 23epoch:train:5501-5600batch: iter_time=1.116e-04, forward_time=0.144, loss_ctc=67.891, loss_att=54.609, acc=0.697, loss=58.593, backward_time=1.023, grad_norm=102.083, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.615e-05, train_time=2.706 +[gpub005:0/64] 2023-07-07 22:50:34,120 (trainer:732) INFO: 23epoch:train:5601-5700batch: iter_time=1.281e-04, forward_time=0.147, loss_ctc=77.159, loss_att=58.325, acc=0.695, loss=63.975, backward_time=1.027, grad_norm=116.273, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.613e-05, train_time=2.719 +[gpub005:0/64] 2023-07-07 22:52:49,277 (trainer:732) INFO: 23epoch:train:5701-5800batch: iter_time=1.458e-04, forward_time=0.144, loss_ctc=71.209, loss_att=54.625, acc=0.696, loss=59.601, backward_time=1.023, grad_norm=92.611, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.612e-05, train_time=2.703 +[gpub005:0/64] 2023-07-07 22:53:48,189 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-07 22:54:06,512 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 22:54:10,013 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 22:54:10,013 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-07 22:54:10,019 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 22:58:51,643 (trainer:732) INFO: 23epoch:train:5801-5900batch: iter_time=2.200, forward_time=0.169, loss_ctc=67.539, loss_att=50.843, acc=0.696, loss=55.852, backward_time=1.035, grad_norm=101.336, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.183, optim0_lr0=7.610e-05, train_time=7.247 +[gpub005:0/64] 2023-07-07 23:01:06,844 (trainer:732) INFO: 23epoch:train:5901-6000batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=69.923, loss_att=54.611, acc=0.709, loss=59.204, backward_time=1.021, grad_norm=89.503, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.608e-05, train_time=2.704 +[gpub005:0/64] 2023-07-07 23:03:22,210 (trainer:732) INFO: 23epoch:train:6001-6100batch: iter_time=1.177e-04, forward_time=0.143, loss_ctc=73.309, loss_att=56.219, acc=0.695, loss=61.346, backward_time=1.023, grad_norm=108.599, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.606e-05, train_time=2.707 +[gpub005:0/64] 2023-07-07 23:05:37,636 (trainer:732) INFO: 23epoch:train:6101-6200batch: iter_time=1.113e-04, forward_time=0.144, loss_ctc=82.117, loss_att=62.696, acc=0.695, loss=68.522, backward_time=1.023, grad_norm=97.686, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.605e-05, train_time=2.708 +[gpub005:0/64] 2023-07-07 23:07:53,122 (trainer:732) INFO: 23epoch:train:6201-6300batch: iter_time=1.115e-04, forward_time=0.143, loss_ctc=75.087, loss_att=60.028, acc=0.711, loss=64.546, backward_time=1.024, grad_norm=97.876, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.603e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 23:10:08,013 (trainer:732) INFO: 23epoch:train:6301-6400batch: iter_time=1.160e-04, forward_time=0.142, loss_ctc=66.502, loss_att=51.440, acc=0.704, loss=55.959, backward_time=1.020, grad_norm=103.605, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.601e-05, train_time=2.698 +[gpub005:0/64] 2023-07-07 23:12:24,456 (trainer:732) INFO: 23epoch:train:6401-6500batch: iter_time=1.169e-04, forward_time=0.143, loss_ctc=78.370, loss_att=56.212, acc=0.703, loss=62.859, backward_time=1.026, grad_norm=104.276, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.599e-05, train_time=2.729 +[gpub005:0/64] 2023-07-07 23:14:39,768 (trainer:732) INFO: 23epoch:train:6501-6600batch: iter_time=1.029e-04, forward_time=0.143, loss_ctc=75.661, loss_att=58.381, acc=0.689, loss=63.565, backward_time=1.022, grad_norm=120.247, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.180, optim0_lr0=7.598e-05, train_time=2.706 +[gpub005:0/64] 2023-07-07 23:16:16,616 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-07 23:16:34,573 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 23:16:37,987 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 23:16:37,987 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-07 23:16:37,993 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 23:21:16,629 (trainer:732) INFO: 23epoch:train:6601-6700batch: iter_time=1.778, forward_time=0.153, loss_ctc=64.231, loss_att=49.639, acc=0.702, loss=54.016, backward_time=1.033, grad_norm=111.751, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.596e-05, train_time=7.937 +[gpub005:0/64] 2023-07-07 23:23:32,863 (trainer:732) INFO: 23epoch:train:6701-6800batch: iter_time=1.291e-04, forward_time=0.144, loss_ctc=69.473, loss_att=58.857, acc=0.712, loss=62.042, backward_time=1.027, grad_norm=95.279, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.594e-05, train_time=2.724 +[gpub005:0/64] 2023-07-07 23:25:48,593 (trainer:732) INFO: 23epoch:train:6801-6900batch: iter_time=1.190e-04, forward_time=0.145, loss_ctc=67.596, loss_att=53.379, acc=0.699, loss=57.644, backward_time=1.025, grad_norm=117.466, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.592e-05, train_time=2.714 +[gpub005:0/64] 2023-07-07 23:28:04,655 (trainer:732) INFO: 23epoch:train:6901-7000batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=86.976, loss_att=65.598, acc=0.698, loss=72.012, backward_time=1.028, grad_norm=129.974, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.591e-05, train_time=2.721 +[gpub005:0/64] 2023-07-07 23:30:20,292 (trainer:732) INFO: 23epoch:train:7001-7100batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=67.406, loss_att=49.368, acc=0.731, loss=54.780, backward_time=1.026, grad_norm=112.193, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.589e-05, train_time=2.713 +[gpub005:0/64] 2023-07-07 23:32:36,124 (trainer:732) INFO: 23epoch:train:7101-7200batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=76.719, loss_att=60.990, acc=0.711, loss=65.708, backward_time=1.026, grad_norm=110.570, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.587e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 23:34:51,928 (trainer:732) INFO: 23epoch:train:7201-7300batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=74.869, loss_att=56.094, acc=0.706, loss=61.727, backward_time=1.025, grad_norm=110.042, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.585e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 23:37:07,590 (trainer:732) INFO: 23epoch:train:7301-7400batch: iter_time=1.190e-04, forward_time=0.144, loss_ctc=73.682, loss_att=57.404, acc=0.694, loss=62.287, backward_time=1.025, grad_norm=131.163, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.584e-05, train_time=2.713 +[gpub005:0/64] 2023-07-07 23:39:22,887 (trainer:732) INFO: 23epoch:train:7401-7500batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=66.339, loss_att=49.969, acc=0.710, loss=54.880, backward_time=1.023, grad_norm=103.995, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.582e-05, train_time=2.706 +[gpub005:0/64] 2023-07-07 23:39:24,295 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-07 23:39:42,594 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 23:39:46,092 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 23:39:46,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-07 23:39:46,098 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 23:45:40,419 (trainer:732) INFO: 23epoch:train:7501-7600batch: iter_time=1.251, forward_time=0.163, loss_ctc=71.968, loss_att=56.021, acc=0.710, loss=60.805, backward_time=1.036, grad_norm=115.094, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.580e-05, train_time=7.550 +[gpub005:0/64] 2023-07-07 23:47:56,185 (trainer:732) INFO: 23epoch:train:7601-7700batch: iter_time=1.157e-04, forward_time=0.144, loss_ctc=64.028, loss_att=53.023, acc=0.693, loss=56.324, backward_time=1.023, grad_norm=96.768, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.578e-05, train_time=2.715 +[gpub005:0/64] 2023-07-07 23:50:12,012 (trainer:732) INFO: 23epoch:train:7701-7800batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=82.929, loss_att=60.230, acc=0.703, loss=67.040, backward_time=1.027, grad_norm=101.875, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.577e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 23:52:27,130 (trainer:732) INFO: 23epoch:train:7801-7900batch: iter_time=1.379e-04, forward_time=0.144, loss_ctc=73.151, loss_att=59.489, acc=0.697, loss=63.588, backward_time=1.021, grad_norm=94.433, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.575e-05, train_time=2.702 +[gpub005:0/64] 2023-07-07 23:54:42,527 (trainer:732) INFO: 23epoch:train:7901-8000batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=77.527, loss_att=59.793, acc=0.707, loss=65.113, backward_time=1.024, grad_norm=97.630, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.573e-05, train_time=2.708 +[gpub005:0/64] 2023-07-07 23:56:57,972 (trainer:732) INFO: 23epoch:train:8001-8100batch: iter_time=1.089e-04, forward_time=0.143, loss_ctc=67.062, loss_att=52.670, acc=0.703, loss=56.987, backward_time=1.024, grad_norm=91.986, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.571e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 23:59:13,305 (trainer:732) INFO: 23epoch:train:8101-8200batch: iter_time=1.038e-04, forward_time=0.144, loss_ctc=77.397, loss_att=56.849, acc=0.699, loss=63.014, backward_time=1.023, grad_norm=101.149, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.570e-05, train_time=2.706 +[gpub005:0/64] 2023-07-08 00:01:28,395 (trainer:732) INFO: 23epoch:train:8201-8300batch: iter_time=1.169e-04, forward_time=0.143, loss_ctc=70.457, loss_att=54.556, acc=0.695, loss=59.327, backward_time=1.022, grad_norm=87.648, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.568e-05, train_time=2.702 +[gpub005:0/64] 2023-07-08 00:02:14,379 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 00:02:33,090 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 00:02:36,612 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 00:02:36,612 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 00:02:36,618 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 00:08:27,251 (trainer:732) INFO: 23epoch:train:8301-8400batch: iter_time=1.283, forward_time=0.155, loss_ctc=69.792, loss_att=51.702, acc=0.705, loss=57.129, backward_time=1.036, grad_norm=94.302, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.183, optim0_lr0=7.566e-05, train_time=8.377 +[gpub005:0/64] 2023-07-08 00:10:43,043 (trainer:732) INFO: 23epoch:train:8401-8500batch: iter_time=1.183e-04, forward_time=0.143, loss_ctc=64.409, loss_att=49.778, acc=0.698, loss=54.167, backward_time=1.024, grad_norm=88.422, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.564e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 00:12:58,756 (trainer:732) INFO: 23epoch:train:8501-8600batch: iter_time=1.104e-04, forward_time=0.143, loss_ctc=80.753, loss_att=62.383, acc=0.696, loss=67.894, backward_time=1.023, grad_norm=120.710, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.563e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 00:15:15,421 (trainer:732) INFO: 23epoch:train:8601-8700batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=72.795, loss_att=55.598, acc=0.703, loss=60.757, backward_time=1.026, grad_norm=94.499, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.561e-05, train_time=2.733 +[gpub005:0/64] 2023-07-08 00:17:31,354 (trainer:732) INFO: 23epoch:train:8701-8800batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=76.197, loss_att=62.131, acc=0.706, loss=66.351, backward_time=1.028, grad_norm=103.489, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.559e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 00:19:46,862 (trainer:732) INFO: 23epoch:train:8801-8900batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=66.628, loss_att=51.943, acc=0.710, loss=56.349, backward_time=1.025, grad_norm=99.280, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.558e-05, train_time=2.710 +[gpub005:0/64] 2023-07-08 00:22:10,051 (trainer:732) INFO: 23epoch:train:8901-9000batch: iter_time=1.057e-04, forward_time=0.145, loss_ctc=76.575, loss_att=56.267, acc=0.696, loss=62.360, backward_time=1.039, grad_norm=108.949, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.556e-05, train_time=2.864 +[gpub005:0/64] 2023-07-08 00:24:25,632 (trainer:732) INFO: 23epoch:train:9001-9100batch: iter_time=9.660e-05, forward_time=0.144, loss_ctc=69.003, loss_att=54.450, acc=0.697, loss=58.816, backward_time=1.024, grad_norm=89.585, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.554e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 00:25:57,557 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 00:26:15,971 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 00:26:19,431 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 00:26:19,432 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 00:26:19,438 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 00:30:19,119 (trainer:732) INFO: 23epoch:train:9101-9200batch: iter_time=1.292, forward_time=0.171, loss_ctc=67.027, loss_att=52.830, acc=0.700, loss=57.089, backward_time=1.038, grad_norm=110.918, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.552e-05, train_time=7.069 +[gpub005:0/64] 2023-07-08 00:32:58,750 (trainer:732) INFO: 23epoch:train:9201-9300batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=68.474, loss_att=55.236, acc=0.708, loss=59.208, backward_time=1.045, grad_norm=95.325, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.551e-05, train_time=3.193 +[gpub005:0/64] 2023-07-08 00:35:17,283 (trainer:732) INFO: 23epoch:train:9301-9400batch: iter_time=1.271e-04, forward_time=0.144, loss_ctc=67.455, loss_att=52.020, acc=0.693, loss=56.651, backward_time=1.026, grad_norm=96.518, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.549e-05, train_time=2.770 +[gpub005:0/64] 2023-07-08 00:37:36,935 (trainer:732) INFO: 23epoch:train:9401-9500batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=85.642, loss_att=64.226, acc=0.699, loss=70.651, backward_time=1.034, grad_norm=105.352, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.547e-05, train_time=2.793 +[gpub005:0/64] 2023-07-08 00:40:08,217 (trainer:732) INFO: 23epoch:train:9501-9600batch: iter_time=1.427e-04, forward_time=0.143, loss_ctc=65.434, loss_att=49.351, acc=0.723, loss=54.176, backward_time=1.050, grad_norm=100.205, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.545e-05, train_time=3.025 +[gpub005:0/64] 2023-07-08 00:42:32,011 (trainer:732) INFO: 23epoch:train:9601-9700batch: iter_time=1.393e-04, forward_time=0.145, loss_ctc=75.234, loss_att=58.800, acc=0.713, loss=63.730, backward_time=1.038, grad_norm=121.320, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.544e-05, train_time=2.876 +[gpub005:0/64] 2023-07-08 00:44:48,244 (trainer:732) INFO: 23epoch:train:9701-9800batch: iter_time=1.236e-04, forward_time=0.143, loss_ctc=74.479, loss_att=58.067, acc=0.696, loss=62.991, backward_time=1.022, grad_norm=101.161, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.542e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 00:47:03,649 (trainer:732) INFO: 23epoch:train:9801-9900batch: iter_time=1.282e-04, forward_time=0.144, loss_ctc=72.052, loss_att=55.612, acc=0.692, loss=60.544, backward_time=1.023, grad_norm=96.421, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.540e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 00:49:18,598 (trainer:732) INFO: 23epoch:train:9901-10000batch: iter_time=1.172e-04, forward_time=0.143, loss_ctc=65.433, loss_att=49.258, acc=0.705, loss=54.110, backward_time=1.020, grad_norm=96.484, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.180, optim0_lr0=7.539e-05, train_time=2.699 +[gpub005:0/64] 2023-07-08 01:02:22,452 (trainer:338) INFO: 23epoch results: [train] iter_time=0.181, forward_time=0.147, loss_ctc=73.369, loss_att=56.779, acc=0.701, loss=61.756, backward_time=1.027, grad_norm=108.052, clip=100.000, loss_scale=7.674e+20, optim_step_time=0.181, optim0_lr0=7.625e-05, train_time=3.358, time=4 hours, 40 minutes and 12.58 seconds, total_count=200000, gpu_max_cached_mem_GB=34.934, [valid] loss_ctc=49.622, cer_ctc=0.280, loss_att=39.678, acc=0.677, cer=0.355, wer=0.989, loss=42.661, time=6 minutes and 51.55 seconds, total_count=20746, gpu_max_cached_mem_GB=38.229, [att_plot] time=5 minutes and 52.75 seconds, total_count=0, gpu_max_cached_mem_GB=38.229 +[gpub005:0/64] 2023-07-08 01:02:38,315 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub005:0/64] 2023-07-08 01:02:38,323 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/15epoch.pth +[gpub005:0/64] 2023-07-08 01:02:38,323 (trainer:272) INFO: 24/30epoch started. Estimated time to finish: 1 day, 10 hours and 12 minutes +[gpub005:0/64] 2023-07-08 01:02:38,327 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 01:02:56,639 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 01:03:00,105 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 01:03:00,105 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 01:03:00,112 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 01:07:13,542 (trainer:732) INFO: 24epoch:train:1-100batch: iter_time=1.294, forward_time=0.197, loss_ctc=66.378, loss_att=54.002, acc=0.692, loss=57.715, backward_time=1.047, grad_norm=88.560, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.185, optim0_lr0=7.537e-05, train_time=5.504 +[gpub005:0/64] 2023-07-08 01:09:29,832 (trainer:732) INFO: 24epoch:train:101-200batch: iter_time=1.237e-04, forward_time=0.149, loss_ctc=75.563, loss_att=57.087, acc=0.702, loss=62.630, backward_time=1.029, grad_norm=107.763, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.535e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 01:11:47,280 (trainer:732) INFO: 24epoch:train:201-300batch: iter_time=1.254e-04, forward_time=0.156, loss_ctc=81.651, loss_att=60.133, acc=0.708, loss=66.588, backward_time=1.031, grad_norm=101.534, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.185, optim0_lr0=7.533e-05, train_time=2.749 +[gpub005:0/64] 2023-07-08 01:14:08,521 (trainer:732) INFO: 24epoch:train:301-400batch: iter_time=1.208e-04, forward_time=0.151, loss_ctc=74.333, loss_att=59.376, acc=0.682, loss=63.863, backward_time=1.045, grad_norm=90.752, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.186, optim0_lr0=7.532e-05, train_time=2.825 +[gpub005:0/64] 2023-07-08 01:16:26,875 (trainer:732) INFO: 24epoch:train:401-500batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=76.016, loss_att=57.684, acc=0.693, loss=63.183, backward_time=1.029, grad_norm=111.603, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.530e-05, train_time=2.767 +[gpub005:0/64] 2023-07-08 01:18:46,441 (trainer:732) INFO: 24epoch:train:501-600batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=75.027, loss_att=55.807, acc=0.692, loss=61.573, backward_time=1.033, grad_norm=97.334, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.528e-05, train_time=2.791 +[gpub005:0/64] 2023-07-08 01:21:21,783 (trainer:732) INFO: 24epoch:train:601-700batch: iter_time=1.134e-04, forward_time=0.168, loss_ctc=84.656, loss_att=61.793, acc=0.683, loss=68.652, backward_time=1.043, grad_norm=104.486, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.527e-05, train_time=3.107 +[gpub005:0/64] 2023-07-08 01:23:42,605 (trainer:732) INFO: 24epoch:train:701-800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=66.495, loss_att=52.472, acc=0.706, loss=56.679, backward_time=1.032, grad_norm=118.761, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.525e-05, train_time=2.816 +[gpub005:0/64] 2023-07-08 01:24:45,692 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 01:25:03,063 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 01:25:06,403 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 01:25:06,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 01:25:06,447 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 01:31:05,985 (trainer:732) INFO: 24epoch:train:801-900batch: iter_time=2.913, forward_time=0.166, loss_ctc=70.482, loss_att=54.429, acc=0.689, loss=59.245, backward_time=1.040, grad_norm=93.942, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.523e-05, train_time=8.867 +[gpub005:0/64] 2023-07-08 01:33:23,595 (trainer:732) INFO: 24epoch:train:901-1000batch: iter_time=1.271e-04, forward_time=0.146, loss_ctc=74.153, loss_att=57.876, acc=0.710, loss=62.759, backward_time=1.032, grad_norm=94.097, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.522e-05, train_time=2.752 +[gpub005:0/64] 2023-07-08 01:35:39,445 (trainer:732) INFO: 24epoch:train:1001-1100batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=82.574, loss_att=61.700, acc=0.704, loss=67.962, backward_time=1.028, grad_norm=112.668, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.520e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 01:37:55,479 (trainer:732) INFO: 24epoch:train:1101-1200batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=72.525, loss_att=54.759, acc=0.693, loss=60.089, backward_time=1.029, grad_norm=96.496, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.518e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 01:40:11,488 (trainer:732) INFO: 24epoch:train:1201-1300batch: iter_time=1.179e-04, forward_time=0.146, loss_ctc=70.836, loss_att=59.985, acc=0.697, loss=63.240, backward_time=1.029, grad_norm=86.809, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.516e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 01:42:27,421 (trainer:732) INFO: 24epoch:train:1301-1400batch: iter_time=1.202e-04, forward_time=0.147, loss_ctc=77.993, loss_att=57.963, acc=0.699, loss=63.972, backward_time=1.028, grad_norm=95.498, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.515e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 01:44:43,520 (trainer:732) INFO: 24epoch:train:1401-1500batch: iter_time=1.235e-04, forward_time=0.148, loss_ctc=81.518, loss_att=62.066, acc=0.696, loss=67.902, backward_time=1.030, grad_norm=106.767, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.513e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 01:46:59,478 (trainer:732) INFO: 24epoch:train:1501-1600batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=66.753, loss_att=53.187, acc=0.700, loss=57.257, backward_time=1.028, grad_norm=93.450, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.511e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 01:48:35,879 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 01:48:54,179 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 01:48:57,625 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 01:48:57,626 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 01:48:57,632 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 01:52:29,456 (trainer:732) INFO: 24epoch:train:1601-1700batch: iter_time=1.330, forward_time=0.147, loss_ctc=65.681, loss_att=48.919, acc=0.702, loss=53.948, backward_time=1.042, grad_norm=84.444, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.510e-05, train_time=6.599 +[gpub005:0/64] 2023-07-08 01:54:45,379 (trainer:732) INFO: 24epoch:train:1701-1800batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=74.623, loss_att=57.753, acc=0.695, loss=62.814, backward_time=1.027, grad_norm=97.777, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.508e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 01:57:01,045 (trainer:732) INFO: 24epoch:train:1801-1900batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=76.935, loss_att=56.373, acc=0.708, loss=62.542, backward_time=1.026, grad_norm=120.929, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.506e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 01:59:16,723 (trainer:732) INFO: 24epoch:train:1901-2000batch: iter_time=1.132e-04, forward_time=0.147, loss_ctc=71.092, loss_att=51.859, acc=0.702, loss=57.629, backward_time=1.027, grad_norm=100.553, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.505e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 02:01:32,452 (trainer:732) INFO: 24epoch:train:2001-2100batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=72.789, loss_att=61.846, acc=0.686, loss=65.129, backward_time=1.026, grad_norm=98.884, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.503e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 02:03:48,170 (trainer:732) INFO: 24epoch:train:2101-2200batch: iter_time=1.204e-04, forward_time=0.145, loss_ctc=78.081, loss_att=55.115, acc=0.697, loss=62.005, backward_time=1.027, grad_norm=100.800, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.501e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 02:06:03,949 (trainer:732) INFO: 24epoch:train:2201-2300batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=80.049, loss_att=63.948, acc=0.678, loss=68.779, backward_time=1.026, grad_norm=114.705, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.499e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 02:08:19,737 (trainer:732) INFO: 24epoch:train:2301-2400batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=65.402, loss_att=53.461, acc=0.704, loss=57.043, backward_time=1.028, grad_norm=91.034, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.498e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:10:35,472 (trainer:732) INFO: 24epoch:train:2401-2500batch: iter_time=1.229e-04, forward_time=0.146, loss_ctc=72.613, loss_att=52.477, acc=0.698, loss=58.518, backward_time=1.026, grad_norm=99.117, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.496e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 02:10:37,997 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 02:10:56,202 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 02:10:59,652 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 02:10:59,652 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 02:10:59,658 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 02:16:07,690 (trainer:732) INFO: 24epoch:train:2501-2600batch: iter_time=1.217, forward_time=0.174, loss_ctc=63.381, loss_att=52.778, acc=0.701, loss=55.959, backward_time=1.043, grad_norm=89.376, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.494e-05, train_time=6.644 +[gpub005:0/64] 2023-07-08 02:18:23,643 (trainer:732) INFO: 24epoch:train:2601-2700batch: iter_time=1.350e-04, forward_time=0.146, loss_ctc=75.419, loss_att=55.105, acc=0.702, loss=61.199, backward_time=1.026, grad_norm=92.859, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.493e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 02:20:39,387 (trainer:732) INFO: 24epoch:train:2701-2800batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=80.075, loss_att=59.644, acc=0.711, loss=65.773, backward_time=1.028, grad_norm=92.749, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.491e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 02:22:55,022 (trainer:732) INFO: 24epoch:train:2801-2900batch: iter_time=1.190e-04, forward_time=0.145, loss_ctc=74.639, loss_att=60.582, acc=0.683, loss=64.799, backward_time=1.027, grad_norm=100.800, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.489e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 02:25:10,817 (trainer:732) INFO: 24epoch:train:2901-3000batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=72.616, loss_att=55.817, acc=0.697, loss=60.856, backward_time=1.027, grad_norm=106.716, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.488e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:27:26,850 (trainer:732) INFO: 24epoch:train:3001-3100batch: iter_time=1.242e-04, forward_time=0.147, loss_ctc=73.982, loss_att=56.264, acc=0.692, loss=61.579, backward_time=1.030, grad_norm=106.373, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.486e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 02:29:42,652 (trainer:732) INFO: 24epoch:train:3101-3200batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=79.191, loss_att=60.052, acc=0.687, loss=65.793, backward_time=1.029, grad_norm=123.443, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.484e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:31:58,456 (trainer:732) INFO: 24epoch:train:3201-3300batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=64.577, loss_att=50.815, acc=0.710, loss=54.944, backward_time=1.027, grad_norm=96.139, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.483e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:32:57,662 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 02:33:16,507 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 02:33:19,930 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 02:33:19,930 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 02:33:19,936 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 02:39:02,932 (trainer:732) INFO: 24epoch:train:3301-3400batch: iter_time=1.245, forward_time=0.157, loss_ctc=68.788, loss_att=57.316, acc=0.692, loss=60.757, backward_time=1.054, grad_norm=97.600, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.481e-05, train_time=8.489 +[gpub005:0/64] 2023-07-08 02:41:19,229 (trainer:732) INFO: 24epoch:train:3401-3500batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=72.583, loss_att=54.522, acc=0.715, loss=59.940, backward_time=1.029, grad_norm=105.703, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.479e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 02:43:35,703 (trainer:732) INFO: 24epoch:train:3501-3600batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=77.004, loss_att=55.591, acc=0.714, loss=62.015, backward_time=1.028, grad_norm=112.170, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.478e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 02:45:51,850 (trainer:732) INFO: 24epoch:train:3601-3700batch: iter_time=1.155e-04, forward_time=0.146, loss_ctc=71.437, loss_att=55.246, acc=0.700, loss=60.103, backward_time=1.029, grad_norm=102.536, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.476e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 02:48:08,047 (trainer:732) INFO: 24epoch:train:3701-3800batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=73.706, loss_att=63.883, acc=0.693, loss=66.830, backward_time=1.029, grad_norm=94.452, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.474e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 02:50:24,069 (trainer:732) INFO: 24epoch:train:3801-3900batch: iter_time=1.117e-04, forward_time=0.146, loss_ctc=75.479, loss_att=54.094, acc=0.707, loss=60.510, backward_time=1.029, grad_norm=97.153, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.473e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 02:52:39,889 (trainer:732) INFO: 24epoch:train:3901-4000batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=78.941, loss_att=58.000, acc=0.700, loss=64.282, backward_time=1.027, grad_norm=111.973, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.471e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:54:55,730 (trainer:732) INFO: 24epoch:train:4001-4100batch: iter_time=1.273e-04, forward_time=0.146, loss_ctc=66.834, loss_att=54.123, acc=0.701, loss=57.936, backward_time=1.027, grad_norm=96.300, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.469e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 02:56:27,946 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 02:56:45,945 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 02:56:49,390 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 02:56:49,390 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 02:56:49,396 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 03:01:09,171 (trainer:732) INFO: 24epoch:train:4101-4200batch: iter_time=1.248, forward_time=0.161, loss_ctc=71.173, loss_att=57.787, acc=0.718, loss=61.802, backward_time=1.042, grad_norm=102.778, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.468e-05, train_time=7.469 +[gpub005:0/64] 2023-07-08 03:03:25,586 (trainer:732) INFO: 24epoch:train:4201-4300batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=66.106, loss_att=52.017, acc=0.708, loss=56.244, backward_time=1.033, grad_norm=89.218, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.466e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 03:05:41,834 (trainer:732) INFO: 24epoch:train:4301-4400batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=71.008, loss_att=49.565, acc=0.721, loss=55.998, backward_time=1.030, grad_norm=101.755, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.464e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 03:07:57,995 (trainer:732) INFO: 24epoch:train:4401-4500batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=77.163, loss_att=57.856, acc=0.709, loss=63.648, backward_time=1.029, grad_norm=109.828, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.463e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 03:10:13,988 (trainer:732) INFO: 24epoch:train:4501-4600batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=76.375, loss_att=64.397, acc=0.699, loss=67.990, backward_time=1.029, grad_norm=107.384, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.461e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 03:12:30,013 (trainer:732) INFO: 24epoch:train:4601-4700batch: iter_time=1.345e-04, forward_time=0.148, loss_ctc=70.244, loss_att=51.300, acc=0.711, loss=56.983, backward_time=1.029, grad_norm=100.325, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.459e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 03:14:46,161 (trainer:732) INFO: 24epoch:train:4701-4800batch: iter_time=1.171e-04, forward_time=0.146, loss_ctc=77.063, loss_att=60.448, acc=0.691, loss=65.433, backward_time=1.031, grad_norm=99.011, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.458e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 03:17:07,246 (trainer:732) INFO: 24epoch:train:4801-4900batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=72.573, loss_att=53.899, acc=0.709, loss=59.501, backward_time=1.033, grad_norm=110.928, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.456e-05, train_time=2.821 +[gpub005:0/64] 2023-07-08 03:19:26,086 (trainer:732) INFO: 24epoch:train:4901-5000batch: iter_time=1.179e-04, forward_time=0.146, loss_ctc=71.710, loss_att=56.838, acc=0.706, loss=61.300, backward_time=1.032, grad_norm=102.143, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.454e-05, train_time=2.777 +[gpub005:0/64] 2023-07-08 03:19:27,571 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 03:19:45,853 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 03:19:49,250 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 03:19:49,250 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 03:19:49,256 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 03:25:36,249 (trainer:732) INFO: 24epoch:train:5001-5100batch: iter_time=1.261, forward_time=0.144, loss_ctc=65.391, loss_att=54.498, acc=0.700, loss=57.766, backward_time=1.048, grad_norm=94.745, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.453e-05, train_time=7.403 +[gpub005:0/64] 2023-07-08 03:27:52,012 (trainer:732) INFO: 24epoch:train:5101-5200batch: iter_time=1.034e-04, forward_time=0.145, loss_ctc=72.872, loss_att=53.944, acc=0.711, loss=59.622, backward_time=1.026, grad_norm=120.048, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.451e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 03:30:07,787 (trainer:732) INFO: 24epoch:train:5201-5300batch: iter_time=9.830e-05, forward_time=0.145, loss_ctc=78.638, loss_att=59.184, acc=0.712, loss=65.020, backward_time=1.027, grad_norm=92.094, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.449e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 03:32:25,756 (trainer:732) INFO: 24epoch:train:5301-5400batch: iter_time=9.984e-05, forward_time=0.145, loss_ctc=74.505, loss_att=60.142, acc=0.682, loss=64.451, backward_time=1.044, grad_norm=103.493, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.448e-05, train_time=2.759 +[gpub005:0/64] 2023-07-08 03:34:42,021 (trainer:732) INFO: 24epoch:train:5401-5500batch: iter_time=9.984e-05, forward_time=0.145, loss_ctc=71.932, loss_att=56.685, acc=0.695, loss=61.259, backward_time=1.031, grad_norm=90.686, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.446e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 03:36:58,579 (trainer:732) INFO: 24epoch:train:5501-5600batch: iter_time=1.076e-04, forward_time=0.146, loss_ctc=73.486, loss_att=54.717, acc=0.701, loss=60.348, backward_time=1.032, grad_norm=106.312, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.444e-05, train_time=2.731 +[gpub005:0/64] 2023-07-08 03:39:14,325 (trainer:732) INFO: 24epoch:train:5601-5700batch: iter_time=9.864e-05, forward_time=0.145, loss_ctc=77.877, loss_att=59.762, acc=0.687, loss=65.197, backward_time=1.026, grad_norm=109.617, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.443e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 03:41:30,723 (trainer:732) INFO: 24epoch:train:5701-5800batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=65.162, loss_att=50.988, acc=0.716, loss=55.240, backward_time=1.032, grad_norm=98.285, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.441e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 03:42:16,993 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 03:42:34,998 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 03:42:38,488 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 03:42:38,488 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-08 03:42:38,494 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 03:47:54,199 (trainer:732) INFO: 24epoch:train:5801-5900batch: iter_time=1.236, forward_time=0.154, loss_ctc=69.112, loss_att=56.695, acc=0.701, loss=60.420, backward_time=1.042, grad_norm=120.681, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.439e-05, train_time=7.669 +[gpub005:0/64] 2023-07-08 03:50:22,625 (trainer:732) INFO: 24epoch:train:5901-6000batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=71.865, loss_att=55.398, acc=0.711, loss=60.338, backward_time=1.043, grad_norm=110.200, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.438e-05, train_time=2.968 +[gpub005:0/64] 2023-07-08 03:52:49,847 (trainer:732) INFO: 24epoch:train:6001-6100batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=76.266, loss_att=55.052, acc=0.717, loss=61.416, backward_time=1.051, grad_norm=98.498, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.436e-05, train_time=2.944 +[gpub005:0/64] 2023-07-08 03:55:19,854 (trainer:732) INFO: 24epoch:train:6101-6200batch: iter_time=1.249e-04, forward_time=0.156, loss_ctc=70.751, loss_att=53.614, acc=0.707, loss=58.755, backward_time=1.049, grad_norm=103.884, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.435e-05, train_time=3.000 +[gpub005:0/64] 2023-07-08 03:57:49,296 (trainer:732) INFO: 24epoch:train:6201-6300batch: iter_time=1.229e-04, forward_time=0.148, loss_ctc=76.058, loss_att=64.659, acc=0.695, loss=68.078, backward_time=1.065, grad_norm=100.653, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.433e-05, train_time=2.989 +[gpub005:0/64] 2023-07-08 04:00:09,479 (trainer:732) INFO: 24epoch:train:6301-6400batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=74.958, loss_att=54.669, acc=0.705, loss=60.756, backward_time=1.031, grad_norm=94.045, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.431e-05, train_time=2.803 +[gpub005:0/64] 2023-07-08 04:02:32,143 (trainer:732) INFO: 24epoch:train:6401-6500batch: iter_time=1.264e-04, forward_time=0.145, loss_ctc=78.123, loss_att=58.724, acc=0.701, loss=64.544, backward_time=1.040, grad_norm=109.338, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.430e-05, train_time=2.853 +[gpub005:0/64] 2023-07-08 04:04:51,610 (trainer:732) INFO: 24epoch:train:6501-6600batch: iter_time=1.316e-04, forward_time=0.145, loss_ctc=66.708, loss_att=54.417, acc=0.704, loss=58.104, backward_time=1.032, grad_norm=102.988, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.428e-05, train_time=2.789 +[gpub005:0/64] 2023-07-08 04:06:26,443 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 04:06:44,558 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 04:06:47,998 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 04:06:47,998 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 04:06:48,004 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 04:09:57,980 (trainer:732) INFO: 24epoch:train:6601-6700batch: iter_time=1.411, forward_time=0.145, loss_ctc=70.865, loss_att=57.768, acc=0.718, loss=61.697, backward_time=1.040, grad_norm=96.931, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.426e-05, train_time=6.127 +[gpub005:0/64] 2023-07-08 04:12:15,222 (trainer:732) INFO: 24epoch:train:6701-6800batch: iter_time=1.273e-04, forward_time=0.145, loss_ctc=67.487, loss_att=52.036, acc=0.707, loss=56.671, backward_time=1.034, grad_norm=100.553, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.425e-05, train_time=2.745 +[gpub005:0/64] 2023-07-08 04:14:30,629 (trainer:732) INFO: 24epoch:train:6801-6900batch: iter_time=1.276e-04, forward_time=0.144, loss_ctc=71.361, loss_att=50.340, acc=0.711, loss=56.646, backward_time=1.025, grad_norm=111.542, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.423e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 04:16:46,581 (trainer:732) INFO: 24epoch:train:6901-7000batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=77.871, loss_att=57.029, acc=0.711, loss=63.282, backward_time=1.029, grad_norm=107.653, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.421e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 04:19:02,671 (trainer:732) INFO: 24epoch:train:7001-7100batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=76.050, loss_att=64.346, acc=0.697, loss=67.857, backward_time=1.029, grad_norm=91.851, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.420e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 04:21:18,512 (trainer:732) INFO: 24epoch:train:7101-7200batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=70.235, loss_att=51.323, acc=0.707, loss=56.997, backward_time=1.029, grad_norm=95.632, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.418e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 04:23:34,926 (trainer:732) INFO: 24epoch:train:7201-7300batch: iter_time=1.009e-04, forward_time=0.146, loss_ctc=74.110, loss_att=59.073, acc=0.684, loss=63.584, backward_time=1.033, grad_norm=116.090, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.417e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 04:25:50,893 (trainer:732) INFO: 24epoch:train:7301-7400batch: iter_time=1.053e-04, forward_time=0.145, loss_ctc=71.461, loss_att=54.905, acc=0.707, loss=59.872, backward_time=1.028, grad_norm=89.587, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.415e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 04:28:06,592 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 04:28:24,838 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 04:28:28,253 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 04:28:28,253 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 04:28:28,259 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 04:31:58,721 (trainer:732) INFO: 24epoch:train:7401-7500batch: iter_time=1.248, forward_time=0.157, loss_ctc=70.151, loss_att=54.443, acc=0.701, loss=59.156, backward_time=1.035, grad_norm=102.377, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.413e-05, train_time=7.356 +[gpub005:0/64] 2023-07-08 04:34:16,951 (trainer:732) INFO: 24epoch:train:7501-7600batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=64.218, loss_att=51.088, acc=0.702, loss=55.027, backward_time=1.035, grad_norm=94.349, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.412e-05, train_time=2.765 +[gpub005:0/64] 2023-07-08 04:36:33,188 (trainer:732) INFO: 24epoch:train:7601-7700batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=72.839, loss_att=52.242, acc=0.715, loss=58.421, backward_time=1.027, grad_norm=100.137, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.410e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 04:38:49,029 (trainer:732) INFO: 24epoch:train:7701-7800batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=77.897, loss_att=56.319, acc=0.710, loss=62.792, backward_time=1.029, grad_norm=96.600, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.408e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 04:41:04,866 (trainer:732) INFO: 24epoch:train:7801-7900batch: iter_time=1.253e-04, forward_time=0.145, loss_ctc=75.016, loss_att=64.152, acc=0.690, loss=67.411, backward_time=1.027, grad_norm=99.392, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.407e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 04:43:21,052 (trainer:732) INFO: 24epoch:train:7901-8000batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=69.861, loss_att=50.596, acc=0.706, loss=56.375, backward_time=1.031, grad_norm=85.637, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.405e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 04:45:36,903 (trainer:732) INFO: 24epoch:train:8001-8100batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=75.733, loss_att=59.098, acc=0.684, loss=64.089, backward_time=1.028, grad_norm=96.265, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.404e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 04:47:52,361 (trainer:732) INFO: 24epoch:train:8101-8200batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=71.279, loss_att=55.254, acc=0.703, loss=60.061, backward_time=1.025, grad_norm=104.118, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.402e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 04:50:08,165 (trainer:732) INFO: 24epoch:train:8201-8300batch: iter_time=1.132e-04, forward_time=0.145, loss_ctc=66.791, loss_att=54.537, acc=0.708, loss=58.213, backward_time=1.027, grad_norm=107.484, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.400e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 04:51:06,107 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 04:51:24,031 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 04:51:27,758 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 04:51:27,758 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 04:51:27,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 04:56:22,745 (trainer:732) INFO: 24epoch:train:8301-8400batch: iter_time=2.278, forward_time=0.194, loss_ctc=63.795, loss_att=46.168, acc=0.707, loss=51.456, backward_time=1.048, grad_norm=100.525, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.185, optim0_lr0=7.399e-05, train_time=7.491 +[gpub005:0/64] 2023-07-08 04:58:39,347 (trainer:732) INFO: 24epoch:train:8401-8500batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=70.673, loss_att=54.531, acc=0.718, loss=59.373, backward_time=1.027, grad_norm=101.049, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.397e-05, train_time=2.732 +[gpub005:0/64] 2023-07-08 05:00:56,190 (trainer:732) INFO: 24epoch:train:8501-8600batch: iter_time=1.362e-04, forward_time=0.146, loss_ctc=81.170, loss_att=59.636, acc=0.713, loss=66.096, backward_time=1.033, grad_norm=102.757, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.395e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 05:03:12,609 (trainer:732) INFO: 24epoch:train:8601-8700batch: iter_time=1.274e-04, forward_time=0.147, loss_ctc=70.543, loss_att=53.905, acc=0.698, loss=58.897, backward_time=1.033, grad_norm=98.360, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.394e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 05:05:28,730 (trainer:732) INFO: 24epoch:train:8701-8800batch: iter_time=1.292e-04, forward_time=0.147, loss_ctc=69.057, loss_att=58.090, acc=0.709, loss=61.380, backward_time=1.030, grad_norm=125.170, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.392e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 05:07:44,575 (trainer:732) INFO: 24epoch:train:8801-8900batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=77.481, loss_att=55.840, acc=0.710, loss=62.332, backward_time=1.029, grad_norm=96.606, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.391e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 05:10:00,543 (trainer:732) INFO: 24epoch:train:8901-9000batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=77.860, loss_att=60.412, acc=0.702, loss=65.647, backward_time=1.029, grad_norm=107.480, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.389e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 05:12:16,943 (trainer:732) INFO: 24epoch:train:9001-9100batch: iter_time=1.265e-04, forward_time=0.147, loss_ctc=64.228, loss_att=51.265, acc=0.713, loss=55.154, backward_time=1.032, grad_norm=103.336, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.387e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 05:13:50,730 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 05:14:09,041 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 05:14:12,445 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 05:14:12,446 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 05:14:12,452 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 05:18:16,694 (trainer:732) INFO: 24epoch:train:9101-9200batch: iter_time=1.278, forward_time=0.147, loss_ctc=69.239, loss_att=55.541, acc=0.700, loss=59.650, backward_time=1.040, grad_norm=102.258, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.386e-05, train_time=7.195 +[gpub005:0/64] 2023-07-08 05:20:33,672 (trainer:732) INFO: 24epoch:train:9201-9300batch: iter_time=1.200e-04, forward_time=0.147, loss_ctc=65.579, loss_att=50.029, acc=0.720, loss=54.694, backward_time=1.032, grad_norm=84.538, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.384e-05, train_time=2.739 +[gpub005:0/64] 2023-07-08 05:22:50,622 (trainer:732) INFO: 24epoch:train:9301-9400batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=70.083, loss_att=49.325, acc=0.718, loss=55.553, backward_time=1.030, grad_norm=91.235, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.382e-05, train_time=2.739 +[gpub005:0/64] 2023-07-08 05:25:06,734 (trainer:732) INFO: 24epoch:train:9401-9500batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=77.209, loss_att=57.645, acc=0.710, loss=63.514, backward_time=1.026, grad_norm=95.607, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.182, optim0_lr0=7.381e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 05:27:22,855 (trainer:732) INFO: 24epoch:train:9501-9600batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=75.606, loss_att=64.748, acc=0.703, loss=68.006, backward_time=1.030, grad_norm=94.541, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.379e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 05:29:38,842 (trainer:732) INFO: 24epoch:train:9601-9700batch: iter_time=1.074e-04, forward_time=0.147, loss_ctc=70.548, loss_att=50.666, acc=0.716, loss=56.631, backward_time=1.030, grad_norm=106.879, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.378e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 05:31:55,248 (trainer:732) INFO: 24epoch:train:9701-9800batch: iter_time=1.109e-04, forward_time=0.146, loss_ctc=76.421, loss_att=59.658, acc=0.695, loss=64.687, backward_time=1.032, grad_norm=108.855, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.376e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 05:34:11,300 (trainer:732) INFO: 24epoch:train:9801-9900batch: iter_time=1.108e-04, forward_time=0.145, loss_ctc=69.905, loss_att=51.680, acc=0.718, loss=57.148, backward_time=1.032, grad_norm=97.751, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.374e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 05:36:27,320 (trainer:732) INFO: 24epoch:train:9901-10000batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=71.831, loss_att=57.034, acc=0.706, loss=61.473, backward_time=1.031, grad_norm=88.956, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.373e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 05:49:37,783 (trainer:338) INFO: 24epoch results: [train] iter_time=0.180, forward_time=0.148, loss_ctc=72.844, loss_att=56.139, acc=0.703, loss=61.150, backward_time=1.032, grad_norm=101.161, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.454e-05, train_time=3.286, time=4 hours, 34 minutes and 8.11 seconds, total_count=210000, gpu_max_cached_mem_GB=38.229, [valid] loss_ctc=49.172, cer_ctc=0.283, loss_att=39.899, acc=0.671, cer=0.378, wer=0.988, loss=42.681, time=6 minutes and 54.41 seconds, total_count=21758, gpu_max_cached_mem_GB=38.229, [att_plot] time=5 minutes and 56.93 seconds, total_count=0, gpu_max_cached_mem_GB=38.229 +[gpub005:0/64] 2023-07-08 05:49:53,063 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-08 05:49:53,184 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/17epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/19epoch.pth +[gpub005:0/64] 2023-07-08 05:49:53,184 (trainer:272) INFO: 25/30epoch started. Estimated time to finish: 1 day, 5 hours and 1 minute +[gpub005:0/64] 2023-07-08 05:49:53,188 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 05:50:11,340 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 05:50:15,036 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 05:50:15,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 05:50:15,043 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 05:54:20,277 (trainer:732) INFO: 25epoch:train:1-100batch: iter_time=1.258, forward_time=0.155, loss_ctc=67.394, loss_att=55.790, acc=0.669, loss=59.271, backward_time=1.041, grad_norm=94.745, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.371e-05, train_time=5.341 +[gpub005:0/64] 2023-07-08 05:56:37,742 (trainer:732) INFO: 25epoch:train:101-200batch: iter_time=1.387e-04, forward_time=0.146, loss_ctc=84.308, loss_att=59.589, acc=0.684, loss=67.005, backward_time=1.031, grad_norm=111.641, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.370e-05, train_time=2.749 +[gpub005:0/64] 2023-07-08 05:59:04,212 (trainer:732) INFO: 25epoch:train:201-300batch: iter_time=1.391e-04, forward_time=0.147, loss_ctc=78.994, loss_att=61.853, acc=0.673, loss=66.996, backward_time=1.039, grad_norm=104.903, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.368e-05, train_time=2.929 +[gpub005:0/64] 2023-07-08 06:01:23,041 (trainer:732) INFO: 25epoch:train:301-400batch: iter_time=1.347e-04, forward_time=0.151, loss_ctc=70.404, loss_att=51.376, acc=0.693, loss=57.084, backward_time=1.027, grad_norm=102.277, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.366e-05, train_time=2.776 +[gpub005:0/64] 2023-07-08 06:03:45,683 (trainer:732) INFO: 25epoch:train:401-500batch: iter_time=1.110e-04, forward_time=0.154, loss_ctc=71.110, loss_att=53.943, acc=0.687, loss=59.093, backward_time=1.036, grad_norm=110.916, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.365e-05, train_time=2.853 +[gpub005:0/64] 2023-07-08 06:06:01,868 (trainer:732) INFO: 25epoch:train:501-600batch: iter_time=1.154e-04, forward_time=0.145, loss_ctc=76.464, loss_att=60.641, acc=0.668, loss=65.388, backward_time=1.027, grad_norm=119.647, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.363e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 06:08:24,461 (trainer:732) INFO: 25epoch:train:601-700batch: iter_time=1.100e-04, forward_time=0.160, loss_ctc=73.685, loss_att=54.373, acc=0.707, loss=60.166, backward_time=1.038, grad_norm=117.803, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.362e-05, train_time=2.851 +[gpub005:0/64] 2023-07-08 06:10:47,886 (trainer:732) INFO: 25epoch:train:701-800batch: iter_time=1.234e-04, forward_time=0.153, loss_ctc=73.438, loss_att=57.055, acc=0.681, loss=61.970, backward_time=1.037, grad_norm=102.935, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.360e-05, train_time=2.869 +[gpub005:0/64] 2023-07-08 06:11:41,403 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 06:11:59,269 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 06:12:02,898 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 06:12:02,898 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 06:12:02,905 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 06:16:41,012 (trainer:732) INFO: 25epoch:train:801-900batch: iter_time=1.458, forward_time=0.191, loss_ctc=67.076, loss_att=57.674, acc=0.679, loss=60.495, backward_time=1.042, grad_norm=97.265, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.185, optim0_lr0=7.358e-05, train_time=7.062 +[gpub005:0/64] 2023-07-08 06:18:59,652 (trainer:732) INFO: 25epoch:train:901-1000batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=80.822, loss_att=56.630, acc=0.693, loss=63.887, backward_time=1.033, grad_norm=105.567, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.357e-05, train_time=2.773 +[gpub005:0/64] 2023-07-08 06:21:15,828 (trainer:732) INFO: 25epoch:train:1001-1100batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=80.627, loss_att=60.328, acc=0.686, loss=66.417, backward_time=1.029, grad_norm=110.847, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.355e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 06:23:34,704 (trainer:732) INFO: 25epoch:train:1101-1200batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=68.462, loss_att=49.783, acc=0.699, loss=55.387, backward_time=1.038, grad_norm=96.138, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.354e-05, train_time=2.777 +[gpub005:0/64] 2023-07-08 06:25:51,451 (trainer:732) INFO: 25epoch:train:1201-1300batch: iter_time=1.223e-04, forward_time=0.147, loss_ctc=68.844, loss_att=51.168, acc=0.687, loss=56.471, backward_time=1.030, grad_norm=91.064, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.352e-05, train_time=2.735 +[gpub005:0/64] 2023-07-08 06:28:09,593 (trainer:732) INFO: 25epoch:train:1301-1400batch: iter_time=1.119e-04, forward_time=0.147, loss_ctc=77.293, loss_att=63.126, acc=0.669, loss=67.376, backward_time=1.029, grad_norm=94.136, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.351e-05, train_time=2.763 +[gpub005:0/64] 2023-07-08 06:30:27,572 (trainer:732) INFO: 25epoch:train:1401-1500batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=72.320, loss_att=52.628, acc=0.702, loss=58.535, backward_time=1.031, grad_norm=107.375, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.349e-05, train_time=2.759 +[gpub005:0/64] 2023-07-08 06:32:43,510 (trainer:732) INFO: 25epoch:train:1501-1600batch: iter_time=1.230e-04, forward_time=0.146, loss_ctc=72.172, loss_att=55.048, acc=0.694, loss=60.185, backward_time=1.030, grad_norm=114.079, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.347e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 06:34:30,871 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 06:34:49,193 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 06:34:52,675 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 06:34:52,675 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 06:34:52,681 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 06:39:03,625 (trainer:732) INFO: 25epoch:train:1601-1700batch: iter_time=2.312, forward_time=0.145, loss_ctc=62.890, loss_att=49.929, acc=0.685, loss=53.817, backward_time=1.041, grad_norm=91.507, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.346e-05, train_time=7.602 +[gpub005:0/64] 2023-07-08 06:41:20,097 (trainer:732) INFO: 25epoch:train:1701-1800batch: iter_time=9.406e-05, forward_time=0.144, loss_ctc=73.172, loss_att=63.151, acc=0.679, loss=66.157, backward_time=1.032, grad_norm=103.748, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.344e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 06:43:36,333 (trainer:732) INFO: 25epoch:train:1801-1900batch: iter_time=9.608e-05, forward_time=0.144, loss_ctc=77.961, loss_att=56.059, acc=0.692, loss=62.629, backward_time=1.030, grad_norm=116.681, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.343e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 06:45:52,234 (trainer:732) INFO: 25epoch:train:1901-2000batch: iter_time=1.022e-04, forward_time=0.144, loss_ctc=76.589, loss_att=57.774, acc=0.696, loss=63.419, backward_time=1.028, grad_norm=111.423, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.341e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 06:48:07,914 (trainer:732) INFO: 25epoch:train:2001-2100batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=70.293, loss_att=53.244, acc=0.693, loss=58.359, backward_time=1.027, grad_norm=96.267, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.339e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 06:50:23,824 (trainer:732) INFO: 25epoch:train:2101-2200batch: iter_time=1.005e-04, forward_time=0.145, loss_ctc=67.023, loss_att=52.896, acc=0.693, loss=57.134, backward_time=1.028, grad_norm=92.993, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.338e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 06:52:40,143 (trainer:732) INFO: 25epoch:train:2201-2300batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=76.121, loss_att=61.259, acc=0.706, loss=65.718, backward_time=1.031, grad_norm=108.003, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.336e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 06:54:56,170 (trainer:732) INFO: 25epoch:train:2301-2400batch: iter_time=1.002e-04, forward_time=0.145, loss_ctc=76.319, loss_att=58.518, acc=0.696, loss=63.859, backward_time=1.030, grad_norm=138.170, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.335e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 06:57:11,886 (trainer:732) INFO: 25epoch:train:2401-2500batch: iter_time=1.036e-04, forward_time=0.145, loss_ctc=66.720, loss_att=49.422, acc=0.706, loss=54.612, backward_time=1.027, grad_norm=88.293, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.333e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 06:57:13,253 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 06:57:31,639 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 06:57:35,098 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 06:57:35,098 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 06:57:35,104 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 07:03:55,517 (trainer:732) INFO: 25epoch:train:2501-2600batch: iter_time=1.269, forward_time=0.168, loss_ctc=71.680, loss_att=61.817, acc=0.673, loss=64.776, backward_time=1.045, grad_norm=123.962, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.332e-05, train_time=8.072 +[gpub005:0/64] 2023-07-08 07:06:11,651 (trainer:732) INFO: 25epoch:train:2601-2700batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=78.835, loss_att=55.917, acc=0.698, loss=62.792, backward_time=1.028, grad_norm=111.638, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.330e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 07:08:27,988 (trainer:732) INFO: 25epoch:train:2701-2800batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=74.238, loss_att=55.257, acc=0.702, loss=60.951, backward_time=1.029, grad_norm=93.733, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.328e-05, train_time=2.727 +[gpub005:0/64] 2023-07-08 07:10:43,669 (trainer:732) INFO: 25epoch:train:2801-2900batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=69.537, loss_att=53.230, acc=0.695, loss=58.122, backward_time=1.026, grad_norm=92.779, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.327e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 07:12:59,646 (trainer:732) INFO: 25epoch:train:2901-3000batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=69.671, loss_att=54.659, acc=0.687, loss=59.162, backward_time=1.030, grad_norm=102.190, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.325e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 07:15:16,087 (trainer:732) INFO: 25epoch:train:3001-3100batch: iter_time=1.036e-04, forward_time=0.146, loss_ctc=73.047, loss_att=57.793, acc=0.706, loss=62.369, backward_time=1.032, grad_norm=97.007, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.324e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 07:17:32,402 (trainer:732) INFO: 25epoch:train:3101-3200batch: iter_time=9.844e-05, forward_time=0.145, loss_ctc=75.818, loss_att=59.026, acc=0.709, loss=64.063, backward_time=1.031, grad_norm=99.614, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.322e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 07:19:48,026 (trainer:732) INFO: 25epoch:train:3201-3300batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=65.520, loss_att=44.135, acc=0.710, loss=50.551, backward_time=1.027, grad_norm=101.011, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.321e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 07:20:36,040 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 07:20:54,437 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 07:20:57,907 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 07:20:57,907 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 07:20:57,913 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 07:27:17,085 (trainer:732) INFO: 25epoch:train:3301-3400batch: iter_time=1.283, forward_time=0.146, loss_ctc=63.050, loss_att=54.013, acc=0.684, loss=56.724, backward_time=1.043, grad_norm=89.866, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.319e-05, train_time=8.981 +[gpub005:0/64] 2023-07-08 07:29:33,399 (trainer:732) INFO: 25epoch:train:3401-3500batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=81.752, loss_att=62.301, acc=0.685, loss=68.136, backward_time=1.029, grad_norm=109.748, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.317e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 07:31:49,292 (trainer:732) INFO: 25epoch:train:3501-3600batch: iter_time=1.184e-04, forward_time=0.147, loss_ctc=74.281, loss_att=56.625, acc=0.693, loss=61.921, backward_time=1.027, grad_norm=105.121, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.316e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 07:34:05,347 (trainer:732) INFO: 25epoch:train:3601-3700batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=75.075, loss_att=55.767, acc=0.693, loss=61.559, backward_time=1.030, grad_norm=97.157, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.314e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 07:36:20,907 (trainer:732) INFO: 25epoch:train:3701-3800batch: iter_time=1.206e-04, forward_time=0.147, loss_ctc=69.361, loss_att=52.975, acc=0.686, loss=57.891, backward_time=1.026, grad_norm=97.077, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.313e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 07:38:36,925 (trainer:732) INFO: 25epoch:train:3801-3900batch: iter_time=1.209e-04, forward_time=0.147, loss_ctc=69.622, loss_att=55.526, acc=0.675, loss=59.755, backward_time=1.030, grad_norm=104.909, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.311e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 07:40:52,788 (trainer:732) INFO: 25epoch:train:3901-4000batch: iter_time=1.126e-04, forward_time=0.147, loss_ctc=77.126, loss_att=58.450, acc=0.698, loss=64.053, backward_time=1.028, grad_norm=127.297, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.310e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 07:43:08,833 (trainer:732) INFO: 25epoch:train:4001-4100batch: iter_time=1.138e-04, forward_time=0.147, loss_ctc=70.297, loss_att=52.546, acc=0.700, loss=57.871, backward_time=1.031, grad_norm=108.860, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.308e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 07:44:41,113 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 07:44:59,453 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 07:45:02,893 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 07:45:02,893 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 07:45:02,899 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 07:48:00,388 (trainer:732) INFO: 25epoch:train:4101-4200batch: iter_time=1.244, forward_time=0.152, loss_ctc=63.813, loss_att=47.498, acc=0.698, loss=52.392, backward_time=1.044, grad_norm=101.569, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.306e-05, train_time=5.831 +[gpub005:0/64] 2023-07-08 07:50:17,294 (trainer:732) INFO: 25epoch:train:4201-4300batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=74.377, loss_att=62.988, acc=0.675, loss=66.405, backward_time=1.031, grad_norm=106.091, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.305e-05, train_time=2.738 +[gpub005:0/64] 2023-07-08 07:52:33,320 (trainer:732) INFO: 25epoch:train:4301-4400batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=78.418, loss_att=54.676, acc=0.694, loss=61.798, backward_time=1.026, grad_norm=105.223, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.303e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 07:54:49,532 (trainer:732) INFO: 25epoch:train:4401-4500batch: iter_time=1.139e-04, forward_time=0.145, loss_ctc=75.077, loss_att=55.026, acc=0.702, loss=61.042, backward_time=1.033, grad_norm=101.895, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.302e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 07:57:04,904 (trainer:732) INFO: 25epoch:train:4501-4600batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=69.245, loss_att=52.258, acc=0.691, loss=57.354, backward_time=1.024, grad_norm=112.451, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.300e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 07:59:23,124 (trainer:732) INFO: 25epoch:train:4601-4700batch: iter_time=1.253e-04, forward_time=0.148, loss_ctc=67.163, loss_att=53.729, acc=0.683, loss=57.759, backward_time=1.031, grad_norm=104.056, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.299e-05, train_time=2.764 +[gpub005:0/64] 2023-07-08 08:01:40,001 (trainer:732) INFO: 25epoch:train:4701-4800batch: iter_time=1.179e-04, forward_time=0.147, loss_ctc=75.490, loss_att=58.879, acc=0.703, loss=63.862, backward_time=1.029, grad_norm=103.480, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.297e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 08:03:56,194 (trainer:732) INFO: 25epoch:train:4801-4900batch: iter_time=1.366e-04, forward_time=0.146, loss_ctc=74.857, loss_att=56.613, acc=0.698, loss=62.086, backward_time=1.033, grad_norm=105.020, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.296e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 08:06:11,952 (trainer:732) INFO: 25epoch:train:4901-5000batch: iter_time=1.173e-04, forward_time=0.146, loss_ctc=67.657, loss_att=51.018, acc=0.697, loss=56.010, backward_time=1.027, grad_norm=92.263, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.294e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 08:06:13,258 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 08:06:31,398 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 08:06:34,942 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 08:06:34,942 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 08:06:34,948 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 08:11:35,592 (trainer:732) INFO: 25epoch:train:5001-5100batch: iter_time=1.225, forward_time=0.145, loss_ctc=64.552, loss_att=53.371, acc=0.688, loss=56.726, backward_time=1.044, grad_norm=89.113, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.292e-05, train_time=6.473 +[gpub005:0/64] 2023-07-08 08:13:51,847 (trainer:732) INFO: 25epoch:train:5101-5200batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=79.661, loss_att=57.127, acc=0.702, loss=63.887, backward_time=1.031, grad_norm=107.156, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.291e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 08:16:08,081 (trainer:732) INFO: 25epoch:train:5201-5300batch: iter_time=1.105e-04, forward_time=0.147, loss_ctc=76.918, loss_att=60.190, acc=0.694, loss=65.209, backward_time=1.031, grad_norm=114.919, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.289e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 08:18:23,889 (trainer:732) INFO: 25epoch:train:5301-5400batch: iter_time=1.060e-04, forward_time=0.146, loss_ctc=68.186, loss_att=50.614, acc=0.703, loss=55.886, backward_time=1.027, grad_norm=98.722, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.288e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 08:20:39,880 (trainer:732) INFO: 25epoch:train:5401-5500batch: iter_time=1.119e-04, forward_time=0.148, loss_ctc=68.353, loss_att=52.983, acc=0.701, loss=57.594, backward_time=1.028, grad_norm=93.295, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.286e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 08:22:56,095 (trainer:732) INFO: 25epoch:train:5501-5600batch: iter_time=1.114e-04, forward_time=0.147, loss_ctc=72.638, loss_att=57.921, acc=0.694, loss=62.336, backward_time=1.032, grad_norm=118.868, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.285e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 08:25:12,299 (trainer:732) INFO: 25epoch:train:5601-5700batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=72.064, loss_att=54.239, acc=0.715, loss=59.587, backward_time=1.032, grad_norm=100.871, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.283e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 08:27:28,098 (trainer:732) INFO: 25epoch:train:5701-5800batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=70.937, loss_att=53.502, acc=0.705, loss=58.733, backward_time=1.029, grad_norm=105.846, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.282e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 08:28:14,331 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 08:28:32,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 08:28:36,003 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 08:28:36,003 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 08:28:36,010 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 08:32:06,642 (trainer:732) INFO: 25epoch:train:5801-5900batch: iter_time=1.229, forward_time=0.168, loss_ctc=61.899, loss_att=49.951, acc=0.692, loss=53.536, backward_time=1.043, grad_norm=114.240, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.280e-05, train_time=5.570 +[gpub005:0/64] 2023-07-08 08:34:23,716 (trainer:732) INFO: 25epoch:train:5901-6000batch: iter_time=1.036e-04, forward_time=0.146, loss_ctc=80.144, loss_att=59.966, acc=0.694, loss=66.019, backward_time=1.033, grad_norm=117.796, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.279e-05, train_time=2.741 +[gpub005:0/64] 2023-07-08 08:36:39,086 (trainer:732) INFO: 25epoch:train:6001-6100batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=74.687, loss_att=55.806, acc=0.699, loss=61.470, backward_time=1.025, grad_norm=102.244, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.277e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 08:38:54,592 (trainer:732) INFO: 25epoch:train:6101-6200batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=74.301, loss_att=54.846, acc=0.696, loss=60.682, backward_time=1.026, grad_norm=93.497, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.275e-05, train_time=2.710 +[gpub005:0/64] 2023-07-08 08:41:10,050 (trainer:732) INFO: 25epoch:train:6201-6300batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=69.302, loss_att=51.670, acc=0.693, loss=56.959, backward_time=1.027, grad_norm=100.115, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.274e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 08:43:26,047 (trainer:732) INFO: 25epoch:train:6301-6400batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=70.805, loss_att=56.455, acc=0.675, loss=60.760, backward_time=1.029, grad_norm=84.726, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.272e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 08:45:41,895 (trainer:732) INFO: 25epoch:train:6401-6500batch: iter_time=1.251e-04, forward_time=0.144, loss_ctc=75.784, loss_att=56.769, acc=0.708, loss=62.473, backward_time=1.028, grad_norm=113.466, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.271e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 08:47:57,577 (trainer:732) INFO: 25epoch:train:6501-6600batch: iter_time=1.128e-04, forward_time=0.145, loss_ctc=69.709, loss_att=52.710, acc=0.701, loss=57.809, backward_time=1.027, grad_norm=106.183, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.269e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 08:49:31,994 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 08:49:50,303 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 08:49:53,685 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 08:49:53,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 08:49:53,692 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 08:53:40,732 (trainer:732) INFO: 25epoch:train:6601-6700batch: iter_time=1.490, forward_time=0.185, loss_ctc=61.334, loss_att=48.253, acc=0.699, loss=52.177, backward_time=1.043, grad_norm=98.206, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.185, optim0_lr0=7.268e-05, train_time=6.863 +[gpub005:0/64] 2023-07-08 08:55:57,864 (trainer:732) INFO: 25epoch:train:6701-6800batch: iter_time=1.189e-04, forward_time=0.147, loss_ctc=76.795, loss_att=61.313, acc=0.677, loss=65.958, backward_time=1.034, grad_norm=106.691, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.266e-05, train_time=2.742 +[gpub005:0/64] 2023-07-08 08:58:14,559 (trainer:732) INFO: 25epoch:train:6801-6900batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=76.474, loss_att=54.969, acc=0.701, loss=61.420, backward_time=1.027, grad_norm=92.293, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.265e-05, train_time=2.734 +[gpub005:0/64] 2023-07-08 09:00:30,838 (trainer:732) INFO: 25epoch:train:6901-7000batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=74.051, loss_att=55.688, acc=0.700, loss=61.197, backward_time=1.029, grad_norm=124.427, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.263e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 09:02:46,205 (trainer:732) INFO: 25epoch:train:7001-7100batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=69.640, loss_att=51.459, acc=0.684, loss=56.913, backward_time=1.025, grad_norm=101.859, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.262e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 09:05:02,287 (trainer:732) INFO: 25epoch:train:7101-7200batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=67.988, loss_att=53.614, acc=0.682, loss=57.926, backward_time=1.030, grad_norm=98.586, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.260e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 09:07:18,114 (trainer:732) INFO: 25epoch:train:7201-7300batch: iter_time=1.140e-04, forward_time=0.146, loss_ctc=78.895, loss_att=59.924, acc=0.702, loss=65.615, backward_time=1.029, grad_norm=107.921, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.259e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 09:09:34,216 (trainer:732) INFO: 25epoch:train:7301-7400batch: iter_time=1.041e-04, forward_time=0.147, loss_ctc=68.386, loss_att=52.947, acc=0.699, loss=57.579, backward_time=1.029, grad_norm=100.878, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.257e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 09:11:57,640 (trainer:732) INFO: 25epoch:train:7401-7500batch: iter_time=1.042e-04, forward_time=0.147, loss_ctc=65.301, loss_att=50.575, acc=0.707, loss=54.993, backward_time=1.045, grad_norm=94.219, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.255e-05, train_time=2.868 +[gpub005:0/64] 2023-07-08 09:11:59,129 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 09:12:17,091 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 09:12:20,533 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 09:12:20,534 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 09:12:20,540 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 09:19:22,003 (trainer:732) INFO: 25epoch:train:7501-7600batch: iter_time=1.244, forward_time=0.146, loss_ctc=70.509, loss_att=61.157, acc=0.678, loss=63.963, backward_time=1.041, grad_norm=134.173, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.254e-05, train_time=8.887 +[gpub005:0/64] 2023-07-08 09:21:38,576 (trainer:732) INFO: 25epoch:train:7601-7700batch: iter_time=1.115e-04, forward_time=0.146, loss_ctc=78.059, loss_att=56.103, acc=0.702, loss=62.690, backward_time=1.028, grad_norm=103.019, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.252e-05, train_time=2.731 +[gpub005:0/64] 2023-07-08 09:23:54,909 (trainer:732) INFO: 25epoch:train:7701-7800batch: iter_time=1.239e-04, forward_time=0.148, loss_ctc=72.875, loss_att=55.188, acc=0.704, loss=60.494, backward_time=1.028, grad_norm=104.478, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.251e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 09:26:13,203 (trainer:732) INFO: 25epoch:train:7801-7900batch: iter_time=1.334e-04, forward_time=0.145, loss_ctc=69.878, loss_att=53.424, acc=0.699, loss=58.360, backward_time=1.025, grad_norm=132.395, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.249e-05, train_time=2.766 +[gpub005:0/64] 2023-07-08 09:28:29,065 (trainer:732) INFO: 25epoch:train:7901-8000batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=66.722, loss_att=53.254, acc=0.693, loss=57.294, backward_time=1.026, grad_norm=96.573, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.248e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 09:30:45,214 (trainer:732) INFO: 25epoch:train:8001-8100batch: iter_time=1.310e-04, forward_time=0.147, loss_ctc=72.346, loss_att=57.409, acc=0.709, loss=61.890, backward_time=1.028, grad_norm=100.161, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.246e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 09:33:01,171 (trainer:732) INFO: 25epoch:train:8101-8200batch: iter_time=1.067e-04, forward_time=0.147, loss_ctc=75.162, loss_att=59.259, acc=0.708, loss=64.030, backward_time=1.029, grad_norm=104.558, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.245e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 09:35:16,902 (trainer:732) INFO: 25epoch:train:8201-8300batch: iter_time=1.014e-04, forward_time=0.146, loss_ctc=63.470, loss_att=43.141, acc=0.713, loss=49.240, backward_time=1.028, grad_norm=88.867, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.243e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 09:36:06,322 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 09:36:24,330 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 09:36:28,070 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 09:36:28,070 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 09:36:28,077 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 09:41:52,004 (trainer:732) INFO: 25epoch:train:8301-8400batch: iter_time=1.423, forward_time=0.186, loss_ctc=61.528, loss_att=54.110, acc=0.687, loss=56.335, backward_time=1.049, grad_norm=94.433, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.184, optim0_lr0=7.242e-05, train_time=7.902 +[gpub005:0/64] 2023-07-08 09:44:08,921 (trainer:732) INFO: 25epoch:train:8401-8500batch: iter_time=1.226e-04, forward_time=0.147, loss_ctc=80.713, loss_att=60.130, acc=0.699, loss=66.305, backward_time=1.028, grad_norm=101.118, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.240e-05, train_time=2.738 +[gpub005:0/64] 2023-07-08 09:46:25,326 (trainer:732) INFO: 25epoch:train:8501-8600batch: iter_time=1.084e-04, forward_time=0.146, loss_ctc=72.864, loss_att=55.261, acc=0.709, loss=60.542, backward_time=1.029, grad_norm=110.946, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.239e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 09:48:41,215 (trainer:732) INFO: 25epoch:train:8601-8700batch: iter_time=1.034e-04, forward_time=0.145, loss_ctc=74.393, loss_att=55.528, acc=0.705, loss=61.188, backward_time=1.028, grad_norm=123.026, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.237e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 09:50:57,064 (trainer:732) INFO: 25epoch:train:8701-8800batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=68.057, loss_att=52.431, acc=0.694, loss=57.119, backward_time=1.028, grad_norm=108.479, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.236e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 09:53:13,155 (trainer:732) INFO: 25epoch:train:8801-8900batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=69.612, loss_att=54.933, acc=0.693, loss=59.336, backward_time=1.030, grad_norm=97.704, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.234e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 09:55:29,117 (trainer:732) INFO: 25epoch:train:8901-9000batch: iter_time=1.064e-04, forward_time=0.146, loss_ctc=74.272, loss_att=56.728, acc=0.717, loss=61.992, backward_time=1.029, grad_norm=139.457, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.233e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 09:57:44,878 (trainer:732) INFO: 25epoch:train:9001-9100batch: iter_time=1.241e-04, forward_time=0.148, loss_ctc=69.803, loss_att=53.448, acc=0.703, loss=58.354, backward_time=1.027, grad_norm=100.770, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.231e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 09:59:28,828 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 09:59:46,947 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 09:59:50,459 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 09:59:50,460 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-08 09:59:50,466 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 10:04:57,427 (trainer:732) INFO: 25epoch:train:9101-9200batch: iter_time=2.123, forward_time=0.149, loss_ctc=61.440, loss_att=46.047, acc=0.707, loss=50.665, backward_time=1.039, grad_norm=93.648, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.230e-05, train_time=8.651 +[gpub005:0/64] 2023-07-08 10:07:16,844 (trainer:732) INFO: 25epoch:train:9201-9300batch: iter_time=1.315e-04, forward_time=0.147, loss_ctc=70.907, loss_att=60.317, acc=0.693, loss=63.494, backward_time=1.032, grad_norm=107.964, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.228e-05, train_time=2.788 +[gpub005:0/64] 2023-07-08 10:10:27,961 (trainer:732) INFO: 25epoch:train:9301-9400batch: iter_time=1.220e-04, forward_time=0.148, loss_ctc=76.024, loss_att=54.562, acc=0.702, loss=61.001, backward_time=1.074, grad_norm=98.849, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.227e-05, train_time=3.822 +[gpub005:0/64] 2023-07-08 10:13:29,821 (trainer:732) INFO: 25epoch:train:9401-9500batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=74.449, loss_att=55.408, acc=0.705, loss=61.120, backward_time=1.077, grad_norm=113.210, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.225e-05, train_time=3.637 +[gpub005:0/64] 2023-07-08 10:16:27,233 (trainer:732) INFO: 25epoch:train:9501-9600batch: iter_time=1.013e-04, forward_time=0.144, loss_ctc=69.774, loss_att=53.169, acc=0.695, loss=58.150, backward_time=1.088, grad_norm=100.371, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.224e-05, train_time=3.548 +[gpub005:0/64] 2023-07-08 10:19:25,557 (trainer:732) INFO: 25epoch:train:9601-9700batch: iter_time=9.534e-05, forward_time=0.146, loss_ctc=67.052, loss_att=52.342, acc=0.696, loss=56.755, backward_time=1.091, grad_norm=115.351, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.222e-05, train_time=3.566 +[gpub005:0/64] 2023-07-08 10:22:24,138 (trainer:732) INFO: 25epoch:train:9701-9800batch: iter_time=9.886e-05, forward_time=0.146, loss_ctc=75.203, loss_att=60.399, acc=0.707, loss=64.840, backward_time=1.082, grad_norm=99.748, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.221e-05, train_time=3.571 +[gpub005:0/64] 2023-07-08 10:25:09,252 (trainer:732) INFO: 25epoch:train:9801-9900batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=73.722, loss_att=56.459, acc=0.708, loss=61.638, backward_time=1.067, grad_norm=108.586, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.219e-05, train_time=3.302 +[gpub005:0/64] 2023-07-08 10:27:55,647 (trainer:732) INFO: 25epoch:train:9901-10000batch: iter_time=9.428e-05, forward_time=0.145, loss_ctc=65.177, loss_att=47.351, acc=0.716, loss=52.698, backward_time=1.069, grad_norm=95.321, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.218e-05, train_time=3.328 +[gpub005:0/64] 2023-07-08 10:42:07,209 (trainer:338) INFO: 25epoch results: [train] iter_time=0.176, forward_time=0.148, loss_ctc=71.983, loss_att=55.204, acc=0.695, loss=60.238, backward_time=1.035, grad_norm=104.818, clip=100.000, loss_scale=2.456e+22, optim_step_time=0.183, optim0_lr0=7.294e-05, train_time=3.336, time=4 hours, 38 minutes and 25.25 seconds, total_count=220000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=47.009, cer_ctc=0.273, loss_att=42.530, acc=0.652, cer=0.442, wer=1.000, loss=43.874, time=7 minutes and 56.01 seconds, total_count=22770, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 52.76 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-08 10:42:26,240 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-08 10:42:26,246 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/20epoch.pth +[gpub005:0/64] 2023-07-08 10:42:26,311 (trainer:272) INFO: 26/30epoch started. Estimated time to finish: 1 day, 15 minutes and 1.2 seconds +[gpub005:0/64] 2023-07-08 10:42:27,702 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 10:42:46,699 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 10:42:50,222 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 10:42:50,222 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 10:42:50,310 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 10:53:23,692 (trainer:732) INFO: 26epoch:train:1-100batch: iter_time=5.145, forward_time=0.174, loss_ctc=69.387, loss_att=53.258, acc=0.713, loss=58.097, backward_time=1.043, grad_norm=87.743, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.186, optim0_lr0=7.216e-05, train_time=13.133 +[gpub005:0/64] 2023-07-08 10:55:39,564 (trainer:732) INFO: 26epoch:train:101-200batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=69.584, loss_att=53.691, acc=0.707, loss=58.459, backward_time=1.030, grad_norm=113.614, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.215e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 10:57:57,029 (trainer:732) INFO: 26epoch:train:201-300batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=76.842, loss_att=60.594, acc=0.691, loss=65.468, backward_time=1.031, grad_norm=97.403, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.213e-05, train_time=2.749 +[gpub005:0/64] 2023-07-08 11:00:13,880 (trainer:732) INFO: 26epoch:train:301-400batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=75.295, loss_att=55.762, acc=0.698, loss=61.622, backward_time=1.028, grad_norm=130.883, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.212e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 11:02:31,540 (trainer:732) INFO: 26epoch:train:401-500batch: iter_time=1.269e-04, forward_time=0.145, loss_ctc=71.385, loss_att=52.705, acc=0.691, loss=58.309, backward_time=1.030, grad_norm=118.277, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.210e-05, train_time=2.753 +[gpub005:0/64] 2023-07-08 11:04:48,000 (trainer:732) INFO: 26epoch:train:501-600batch: iter_time=1.212e-04, forward_time=0.146, loss_ctc=86.870, loss_att=64.836, acc=0.690, loss=71.446, backward_time=1.030, grad_norm=142.009, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.209e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 11:07:14,447 (trainer:732) INFO: 26epoch:train:601-700batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=75.534, loss_att=59.806, acc=0.685, loss=64.524, backward_time=1.037, grad_norm=135.306, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.207e-05, train_time=2.929 +[gpub005:0/64] 2023-07-08 11:09:37,366 (trainer:732) INFO: 26epoch:train:701-800batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=75.179, loss_att=56.787, acc=0.705, loss=62.304, backward_time=1.047, grad_norm=102.383, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.206e-05, train_time=2.858 +[gpub005:0/64] 2023-07-08 11:10:31,607 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 11:10:49,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 11:10:52,871 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 11:10:52,871 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 11:10:52,877 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 11:14:37,121 (trainer:732) INFO: 26epoch:train:801-900batch: iter_time=1.279, forward_time=0.180, loss_ctc=66.178, loss_att=53.712, acc=0.706, loss=57.452, backward_time=1.040, grad_norm=98.261, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.204e-05, train_time=5.995 +[gpub005:0/64] 2023-07-08 11:16:53,461 (trainer:732) INFO: 26epoch:train:901-1000batch: iter_time=1.033e-04, forward_time=0.146, loss_ctc=69.626, loss_att=51.851, acc=0.699, loss=57.184, backward_time=1.025, grad_norm=93.025, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.203e-05, train_time=2.727 +[gpub005:0/64] 2023-07-08 11:19:09,392 (trainer:732) INFO: 26epoch:train:1001-1100batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=72.606, loss_att=58.385, acc=0.692, loss=62.651, backward_time=1.028, grad_norm=94.394, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.201e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 11:21:25,206 (trainer:732) INFO: 26epoch:train:1101-1200batch: iter_time=1.058e-04, forward_time=0.145, loss_ctc=75.555, loss_att=58.013, acc=0.691, loss=63.275, backward_time=1.026, grad_norm=102.726, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.200e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 11:23:40,877 (trainer:732) INFO: 26epoch:train:1201-1300batch: iter_time=1.002e-04, forward_time=0.146, loss_ctc=74.959, loss_att=55.140, acc=0.697, loss=61.086, backward_time=1.028, grad_norm=94.634, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.198e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 11:25:56,605 (trainer:732) INFO: 26epoch:train:1301-1400batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=78.132, loss_att=58.606, acc=0.681, loss=64.464, backward_time=1.028, grad_norm=128.909, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.197e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 11:28:12,321 (trainer:732) INFO: 26epoch:train:1401-1500batch: iter_time=1.088e-04, forward_time=0.146, loss_ctc=80.030, loss_att=66.092, acc=0.666, loss=70.273, backward_time=1.027, grad_norm=120.822, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.195e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 11:30:27,679 (trainer:732) INFO: 26epoch:train:1501-1600batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=72.335, loss_att=51.339, acc=0.698, loss=57.638, backward_time=1.027, grad_norm=90.172, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.194e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 11:32:05,236 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 11:32:23,714 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 11:32:27,250 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 11:32:27,250 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 11:32:27,257 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 11:35:43,326 (trainer:732) INFO: 26epoch:train:1601-1700batch: iter_time=1.695, forward_time=0.145, loss_ctc=67.914, loss_att=52.867, acc=0.699, loss=57.381, backward_time=1.039, grad_norm=90.508, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.192e-05, train_time=6.313 +[gpub005:0/64] 2023-07-08 11:37:59,366 (trainer:732) INFO: 26epoch:train:1701-1800batch: iter_time=1.022e-04, forward_time=0.145, loss_ctc=69.223, loss_att=53.087, acc=0.697, loss=57.928, backward_time=1.029, grad_norm=89.506, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.191e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 11:40:15,245 (trainer:732) INFO: 26epoch:train:1801-1900batch: iter_time=9.824e-05, forward_time=0.145, loss_ctc=72.519, loss_att=57.553, acc=0.693, loss=62.043, backward_time=1.029, grad_norm=92.250, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.189e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 11:42:31,327 (trainer:732) INFO: 26epoch:train:1901-2000batch: iter_time=9.918e-05, forward_time=0.146, loss_ctc=76.626, loss_att=56.219, acc=0.694, loss=62.341, backward_time=1.028, grad_norm=104.830, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.188e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 11:44:46,889 (trainer:732) INFO: 26epoch:train:2001-2100batch: iter_time=1.021e-04, forward_time=0.145, loss_ctc=76.462, loss_att=56.079, acc=0.699, loss=62.194, backward_time=1.026, grad_norm=100.652, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.186e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 11:47:02,624 (trainer:732) INFO: 26epoch:train:2101-2200batch: iter_time=1.040e-04, forward_time=0.146, loss_ctc=74.787, loss_att=56.538, acc=0.684, loss=62.013, backward_time=1.027, grad_norm=120.468, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.185e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 11:49:18,264 (trainer:732) INFO: 26epoch:train:2201-2300batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=84.338, loss_att=68.512, acc=0.663, loss=73.260, backward_time=1.027, grad_norm=102.630, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.183e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 11:51:33,885 (trainer:732) INFO: 26epoch:train:2301-2400batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=70.380, loss_att=50.909, acc=0.699, loss=56.750, backward_time=1.027, grad_norm=96.571, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.182e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 11:53:49,838 (trainer:732) INFO: 26epoch:train:2401-2500batch: iter_time=1.182e-04, forward_time=0.148, loss_ctc=69.633, loss_att=54.823, acc=0.700, loss=59.266, backward_time=1.028, grad_norm=116.785, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.180e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 11:53:51,202 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 11:54:09,386 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 11:54:12,870 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 11:54:12,870 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 11:54:12,876 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 11:59:26,483 (trainer:732) INFO: 26epoch:train:2501-2600batch: iter_time=1.206, forward_time=0.146, loss_ctc=72.363, loss_att=51.578, acc=0.716, loss=57.813, backward_time=1.040, grad_norm=117.065, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.179e-05, train_time=6.733 +[gpub005:0/64] 2023-07-08 12:01:42,517 (trainer:732) INFO: 26epoch:train:2601-2700batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=68.902, loss_att=59.039, acc=0.707, loss=61.998, backward_time=1.028, grad_norm=96.352, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.177e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 12:03:58,444 (trainer:732) INFO: 26epoch:train:2701-2800batch: iter_time=1.304e-04, forward_time=0.147, loss_ctc=79.158, loss_att=56.178, acc=0.715, loss=63.072, backward_time=1.028, grad_norm=99.364, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.176e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 12:06:14,072 (trainer:732) INFO: 26epoch:train:2801-2900batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=69.645, loss_att=52.512, acc=0.705, loss=57.652, backward_time=1.027, grad_norm=91.072, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.174e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 12:08:29,717 (trainer:732) INFO: 26epoch:train:2901-3000batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=71.834, loss_att=56.794, acc=0.687, loss=61.306, backward_time=1.027, grad_norm=93.266, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.173e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 12:10:45,477 (trainer:732) INFO: 26epoch:train:3001-3100batch: iter_time=1.273e-04, forward_time=0.146, loss_ctc=81.950, loss_att=63.128, acc=0.688, loss=68.774, backward_time=1.029, grad_norm=123.293, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.171e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 12:13:01,110 (trainer:732) INFO: 26epoch:train:3101-3200batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=72.267, loss_att=57.245, acc=0.702, loss=61.751, backward_time=1.027, grad_norm=98.018, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.170e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 12:15:16,678 (trainer:732) INFO: 26epoch:train:3201-3300batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=69.021, loss_att=54.141, acc=0.701, loss=58.605, backward_time=1.027, grad_norm=97.100, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.168e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 12:16:04,551 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 12:16:23,248 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 12:16:26,719 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 12:16:26,719 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 12:16:26,725 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 12:20:37,309 (trainer:732) INFO: 26epoch:train:3301-3400batch: iter_time=1.265, forward_time=0.147, loss_ctc=73.247, loss_att=53.851, acc=0.718, loss=59.669, backward_time=1.041, grad_norm=101.473, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.167e-05, train_time=6.412 +[gpub005:0/64] 2023-07-08 12:22:53,523 (trainer:732) INFO: 26epoch:train:3401-3500batch: iter_time=1.317e-04, forward_time=0.146, loss_ctc=67.909, loss_att=50.412, acc=0.701, loss=55.661, backward_time=1.026, grad_norm=114.248, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.166e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 12:25:12,032 (trainer:732) INFO: 26epoch:train:3501-3600batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=72.018, loss_att=57.627, acc=0.696, loss=61.944, backward_time=1.031, grad_norm=94.403, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.164e-05, train_time=2.770 +[gpub005:0/64] 2023-07-08 12:27:27,429 (trainer:732) INFO: 26epoch:train:3601-3700batch: iter_time=1.369e-04, forward_time=0.145, loss_ctc=76.212, loss_att=57.567, acc=0.692, loss=63.161, backward_time=1.026, grad_norm=110.612, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.163e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 12:29:43,097 (trainer:732) INFO: 26epoch:train:3701-3800batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=73.717, loss_att=54.219, acc=0.698, loss=60.068, backward_time=1.027, grad_norm=105.595, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.161e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 12:32:01,979 (trainer:732) INFO: 26epoch:train:3801-3900batch: iter_time=1.318e-04, forward_time=0.148, loss_ctc=75.248, loss_att=55.956, acc=0.692, loss=61.744, backward_time=1.028, grad_norm=155.550, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.160e-05, train_time=2.777 +[gpub005:0/64] 2023-07-08 12:34:19,829 (trainer:732) INFO: 26epoch:train:3901-4000batch: iter_time=1.284e-04, forward_time=0.147, loss_ctc=81.079, loss_att=65.252, acc=0.669, loss=70.000, backward_time=1.027, grad_norm=104.122, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.158e-05, train_time=2.757 +[gpub005:0/64] 2023-07-08 12:36:41,064 (trainer:732) INFO: 26epoch:train:4001-4100batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=71.146, loss_att=50.789, acc=0.703, loss=56.897, backward_time=1.035, grad_norm=105.814, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.157e-05, train_time=2.824 +[gpub005:0/64] 2023-07-08 12:38:22,077 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 12:38:40,154 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 12:38:43,616 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 12:38:43,616 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 12:38:43,622 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 12:44:13,729 (trainer:732) INFO: 26epoch:train:4101-4200batch: iter_time=1.254, forward_time=0.146, loss_ctc=65.207, loss_att=53.418, acc=0.701, loss=56.954, backward_time=1.057, grad_norm=94.878, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.155e-05, train_time=9.053 +[gpub005:0/64] 2023-07-08 12:46:31,564 (trainer:732) INFO: 26epoch:train:4201-4300batch: iter_time=1.181e-04, forward_time=0.147, loss_ctc=69.637, loss_att=51.000, acc=0.720, loss=56.591, backward_time=1.028, grad_norm=91.646, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.154e-05, train_time=2.756 +[gpub005:0/64] 2023-07-08 12:48:47,826 (trainer:732) INFO: 26epoch:train:4301-4400batch: iter_time=1.210e-04, forward_time=0.147, loss_ctc=69.877, loss_att=58.670, acc=0.710, loss=62.032, backward_time=1.031, grad_norm=90.287, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.152e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 12:51:03,869 (trainer:732) INFO: 26epoch:train:4401-4500batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=78.707, loss_att=56.151, acc=0.712, loss=62.918, backward_time=1.028, grad_norm=100.064, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.151e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 12:53:19,561 (trainer:732) INFO: 26epoch:train:4501-4600batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=72.489, loss_att=53.880, acc=0.706, loss=59.463, backward_time=1.028, grad_norm=96.597, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.149e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 12:55:35,638 (trainer:732) INFO: 26epoch:train:4601-4700batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=69.209, loss_att=55.959, acc=0.692, loss=59.934, backward_time=1.030, grad_norm=104.066, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.148e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 12:57:51,477 (trainer:732) INFO: 26epoch:train:4701-4800batch: iter_time=1.274e-04, forward_time=0.147, loss_ctc=79.306, loss_att=59.281, acc=0.689, loss=65.288, backward_time=1.028, grad_norm=107.090, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.146e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 13:00:07,474 (trainer:732) INFO: 26epoch:train:4801-4900batch: iter_time=1.384e-04, forward_time=0.147, loss_ctc=73.038, loss_att=57.206, acc=0.700, loss=61.956, backward_time=1.030, grad_norm=99.761, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.145e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 13:02:23,155 (trainer:732) INFO: 26epoch:train:4901-5000batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=69.379, loss_att=53.802, acc=0.705, loss=58.475, backward_time=1.028, grad_norm=104.017, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.144e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 13:02:24,540 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 13:02:42,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 13:02:46,436 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 13:02:46,436 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 13:02:46,442 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 13:07:28,194 (trainer:732) INFO: 26epoch:train:5001-5100batch: iter_time=1.260, forward_time=0.147, loss_ctc=73.292, loss_att=50.325, acc=0.720, loss=57.215, backward_time=1.044, grad_norm=94.971, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.142e-05, train_time=6.101 +[gpub005:0/64] 2023-07-08 13:09:44,725 (trainer:732) INFO: 26epoch:train:5101-5200batch: iter_time=1.017e-04, forward_time=0.146, loss_ctc=69.927, loss_att=58.461, acc=0.708, loss=61.901, backward_time=1.030, grad_norm=103.765, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.141e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 13:12:00,343 (trainer:732) INFO: 26epoch:train:5201-5300batch: iter_time=9.994e-05, forward_time=0.145, loss_ctc=80.359, loss_att=56.264, acc=0.712, loss=63.492, backward_time=1.026, grad_norm=116.586, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.139e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 13:14:16,318 (trainer:732) INFO: 26epoch:train:5301-5400batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=69.708, loss_att=52.973, acc=0.705, loss=57.994, backward_time=1.028, grad_norm=105.492, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.138e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 13:16:31,923 (trainer:732) INFO: 26epoch:train:5401-5500batch: iter_time=1.216e-04, forward_time=0.148, loss_ctc=70.245, loss_att=55.219, acc=0.686, loss=59.727, backward_time=1.026, grad_norm=110.833, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.136e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 13:18:47,558 (trainer:732) INFO: 26epoch:train:5501-5600batch: iter_time=1.173e-04, forward_time=0.148, loss_ctc=80.307, loss_att=60.895, acc=0.691, loss=66.719, backward_time=1.026, grad_norm=99.104, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.135e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 13:21:03,349 (trainer:732) INFO: 26epoch:train:5601-5700batch: iter_time=1.161e-04, forward_time=0.148, loss_ctc=70.927, loss_att=55.692, acc=0.706, loss=60.262, backward_time=1.027, grad_norm=104.300, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.133e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 13:23:19,161 (trainer:732) INFO: 26epoch:train:5701-5800batch: iter_time=1.018e-04, forward_time=0.146, loss_ctc=67.813, loss_att=52.632, acc=0.705, loss=57.186, backward_time=1.026, grad_norm=101.610, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.132e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 13:24:06,674 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 13:24:24,853 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 13:24:28,314 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 13:24:28,315 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-08 13:24:28,321 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 13:29:09,299 (trainer:732) INFO: 26epoch:train:5801-5900batch: iter_time=1.271, forward_time=0.172, loss_ctc=73.165, loss_att=53.526, acc=0.722, loss=59.418, backward_time=1.037, grad_norm=95.823, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.185, optim0_lr0=7.130e-05, train_time=7.002 +[gpub005:0/64] 2023-07-08 13:31:28,366 (trainer:732) INFO: 26epoch:train:5901-6000batch: iter_time=1.015e-04, forward_time=0.147, loss_ctc=67.679, loss_att=48.515, acc=0.726, loss=54.264, backward_time=1.030, grad_norm=99.757, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.129e-05, train_time=2.782 +[gpub005:0/64] 2023-07-08 13:33:57,056 (trainer:732) INFO: 26epoch:train:6001-6100batch: iter_time=1.045e-04, forward_time=0.146, loss_ctc=72.320, loss_att=57.931, acc=0.702, loss=62.248, backward_time=1.035, grad_norm=96.806, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.128e-05, train_time=2.974 +[gpub005:0/64] 2023-07-08 13:36:18,221 (trainer:732) INFO: 26epoch:train:6101-6200batch: iter_time=1.158e-04, forward_time=0.146, loss_ctc=74.820, loss_att=56.437, acc=0.706, loss=61.952, backward_time=1.041, grad_norm=104.704, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.126e-05, train_time=2.823 +[gpub005:0/64] 2023-07-08 13:38:37,328 (trainer:732) INFO: 26epoch:train:6201-6300batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=74.027, loss_att=54.430, acc=0.705, loss=60.309, backward_time=1.037, grad_norm=95.467, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.125e-05, train_time=2.782 +[gpub005:0/64] 2023-07-08 13:40:53,195 (trainer:732) INFO: 26epoch:train:6301-6400batch: iter_time=1.223e-04, forward_time=0.147, loss_ctc=74.703, loss_att=56.062, acc=0.699, loss=61.654, backward_time=1.027, grad_norm=102.603, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.123e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 13:43:09,362 (trainer:732) INFO: 26epoch:train:6401-6500batch: iter_time=1.088e-04, forward_time=0.147, loss_ctc=78.713, loss_att=64.969, acc=0.686, loss=69.093, backward_time=1.028, grad_norm=104.530, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.122e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 13:45:28,847 (trainer:732) INFO: 26epoch:train:6501-6600batch: iter_time=1.245e-04, forward_time=0.147, loss_ctc=69.965, loss_att=51.127, acc=0.717, loss=56.778, backward_time=1.043, grad_norm=82.428, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.120e-05, train_time=2.789 +[gpub005:0/64] 2023-07-08 13:47:05,544 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 13:47:23,796 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 13:47:27,253 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 13:47:27,253 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 13:47:27,260 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 13:50:28,307 (trainer:732) INFO: 26epoch:train:6601-6700batch: iter_time=1.259, forward_time=0.147, loss_ctc=64.311, loss_att=52.844, acc=0.702, loss=56.284, backward_time=1.044, grad_norm=99.413, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.119e-05, train_time=5.989 +[gpub005:0/64] 2023-07-08 13:52:44,647 (trainer:732) INFO: 26epoch:train:6701-6800batch: iter_time=1.222e-04, forward_time=0.146, loss_ctc=69.999, loss_att=51.589, acc=0.707, loss=57.112, backward_time=1.028, grad_norm=108.206, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.117e-05, train_time=2.727 +[gpub005:0/64] 2023-07-08 13:55:00,535 (trainer:732) INFO: 26epoch:train:6801-6900batch: iter_time=1.248e-04, forward_time=0.147, loss_ctc=68.683, loss_att=58.666, acc=0.702, loss=61.671, backward_time=1.027, grad_norm=95.657, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.116e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 13:57:16,106 (trainer:732) INFO: 26epoch:train:6901-7000batch: iter_time=1.367e-04, forward_time=0.146, loss_ctc=79.555, loss_att=57.273, acc=0.699, loss=63.958, backward_time=1.027, grad_norm=106.510, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.115e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 13:59:31,764 (trainer:732) INFO: 26epoch:train:7001-7100batch: iter_time=1.325e-04, forward_time=0.147, loss_ctc=71.814, loss_att=53.324, acc=0.706, loss=58.871, backward_time=1.027, grad_norm=103.081, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.113e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 14:01:47,399 (trainer:732) INFO: 26epoch:train:7101-7200batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=69.657, loss_att=56.274, acc=0.687, loss=60.289, backward_time=1.027, grad_norm=96.955, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.112e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 14:04:03,309 (trainer:732) INFO: 26epoch:train:7201-7300batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=77.898, loss_att=59.498, acc=0.680, loss=65.018, backward_time=1.029, grad_norm=113.679, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.110e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 14:06:19,024 (trainer:732) INFO: 26epoch:train:7301-7400batch: iter_time=1.125e-04, forward_time=0.146, loss_ctc=72.524, loss_att=56.616, acc=0.693, loss=61.388, backward_time=1.028, grad_norm=93.674, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.109e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 14:08:34,660 (trainer:732) INFO: 26epoch:train:7401-7500batch: iter_time=1.323e-04, forward_time=0.146, loss_ctc=69.537, loss_att=53.600, acc=0.701, loss=58.382, backward_time=1.027, grad_norm=106.828, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.107e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 14:08:35,961 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 14:08:54,239 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 14:08:57,934 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 14:08:57,934 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 14:08:57,940 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 14:15:22,244 (trainer:732) INFO: 26epoch:train:7501-7600batch: iter_time=1.758, forward_time=0.148, loss_ctc=68.459, loss_att=51.191, acc=0.727, loss=56.372, backward_time=1.038, grad_norm=87.229, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.106e-05, train_time=8.151 +[gpub005:0/64] 2023-07-08 14:17:40,957 (trainer:732) INFO: 26epoch:train:7601-7700batch: iter_time=1.404e-04, forward_time=0.147, loss_ctc=68.155, loss_att=52.137, acc=0.719, loss=56.943, backward_time=1.029, grad_norm=151.653, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.104e-05, train_time=2.774 +[gpub005:0/64] 2023-07-08 14:19:57,108 (trainer:732) INFO: 26epoch:train:7701-7800batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=75.907, loss_att=59.882, acc=0.701, loss=64.690, backward_time=1.028, grad_norm=99.037, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.103e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 14:22:12,912 (trainer:732) INFO: 26epoch:train:7801-7900batch: iter_time=1.356e-04, forward_time=0.147, loss_ctc=70.855, loss_att=54.076, acc=0.710, loss=59.110, backward_time=1.028, grad_norm=103.081, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.102e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 14:24:30,607 (trainer:732) INFO: 26epoch:train:7901-8000batch: iter_time=1.361e-04, forward_time=0.148, loss_ctc=68.206, loss_att=51.104, acc=0.702, loss=56.235, backward_time=1.031, grad_norm=99.810, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.100e-05, train_time=2.754 +[gpub005:0/64] 2023-07-08 14:26:47,209 (trainer:732) INFO: 26epoch:train:8001-8100batch: iter_time=1.410e-04, forward_time=0.147, loss_ctc=82.712, loss_att=63.271, acc=0.695, loss=69.104, backward_time=1.026, grad_norm=111.254, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.099e-05, train_time=2.732 +[gpub005:0/64] 2023-07-08 14:29:11,879 (trainer:732) INFO: 26epoch:train:8101-8200batch: iter_time=1.378e-04, forward_time=0.147, loss_ctc=73.581, loss_att=58.566, acc=0.694, loss=63.071, backward_time=1.037, grad_norm=97.879, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.097e-05, train_time=2.893 +[gpub005:0/64] 2023-07-08 14:31:27,869 (trainer:732) INFO: 26epoch:train:8201-8300batch: iter_time=1.301e-04, forward_time=0.147, loss_ctc=73.403, loss_att=55.466, acc=0.709, loss=60.847, backward_time=1.028, grad_norm=102.034, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.096e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 14:32:17,789 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 14:32:35,784 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 14:32:39,537 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 14:32:39,537 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 14:32:39,543 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 14:38:06,476 (trainer:732) INFO: 26epoch:train:8301-8400batch: iter_time=1.307, forward_time=0.146, loss_ctc=68.148, loss_att=52.608, acc=0.711, loss=57.270, backward_time=1.039, grad_norm=125.284, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.094e-05, train_time=7.972 +[gpub005:0/64] 2023-07-08 14:40:23,154 (trainer:732) INFO: 26epoch:train:8401-8500batch: iter_time=1.212e-04, forward_time=0.147, loss_ctc=68.148, loss_att=52.855, acc=0.701, loss=57.443, backward_time=1.027, grad_norm=93.761, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.093e-05, train_time=2.733 +[gpub005:0/64] 2023-07-08 14:42:39,471 (trainer:732) INFO: 26epoch:train:8501-8600batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=75.352, loss_att=59.373, acc=0.694, loss=64.167, backward_time=1.026, grad_norm=104.938, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.092e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 14:44:55,183 (trainer:732) INFO: 26epoch:train:8601-8700batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=71.071, loss_att=53.446, acc=0.700, loss=58.734, backward_time=1.026, grad_norm=98.544, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.090e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 14:47:10,603 (trainer:732) INFO: 26epoch:train:8701-8800batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=73.000, loss_att=54.481, acc=0.691, loss=60.037, backward_time=1.026, grad_norm=114.647, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.089e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 14:49:26,242 (trainer:732) INFO: 26epoch:train:8801-8900batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=76.952, loss_att=59.376, acc=0.693, loss=64.649, backward_time=1.028, grad_norm=98.987, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.087e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 14:51:41,715 (trainer:732) INFO: 26epoch:train:8901-9000batch: iter_time=1.081e-04, forward_time=0.146, loss_ctc=78.083, loss_att=61.980, acc=0.678, loss=66.811, backward_time=1.026, grad_norm=108.739, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.086e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 14:53:57,229 (trainer:732) INFO: 26epoch:train:9001-9100batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=70.526, loss_att=52.858, acc=0.705, loss=58.158, backward_time=1.026, grad_norm=99.831, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.084e-05, train_time=2.710 +[gpub005:0/64] 2023-07-08 14:55:29,936 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 14:55:48,203 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 14:55:51,676 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 14:55:51,676 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 14:55:51,682 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 14:59:11,906 (trainer:732) INFO: 26epoch:train:9101-9200batch: iter_time=1.673, forward_time=0.148, loss_ctc=69.871, loss_att=52.356, acc=0.707, loss=57.611, backward_time=1.042, grad_norm=126.560, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.083e-05, train_time=6.293 +[gpub005:0/64] 2023-07-08 15:01:28,565 (trainer:732) INFO: 26epoch:train:9201-9300batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=71.189, loss_att=51.918, acc=0.707, loss=57.699, backward_time=1.029, grad_norm=123.852, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.082e-05, train_time=2.733 +[gpub005:0/64] 2023-07-08 15:03:47,418 (trainer:732) INFO: 26epoch:train:9301-9400batch: iter_time=1.201e-04, forward_time=0.148, loss_ctc=67.872, loss_att=55.829, acc=0.707, loss=59.442, backward_time=1.033, grad_norm=91.349, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.080e-05, train_time=2.777 +[gpub005:0/64] 2023-07-08 15:06:06,242 (trainer:732) INFO: 26epoch:train:9401-9500batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=78.903, loss_att=56.094, acc=0.705, loss=62.937, backward_time=1.030, grad_norm=96.669, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.079e-05, train_time=2.776 +[gpub005:0/64] 2023-07-08 15:08:22,234 (trainer:732) INFO: 26epoch:train:9501-9600batch: iter_time=1.098e-04, forward_time=0.147, loss_ctc=71.778, loss_att=52.472, acc=0.708, loss=58.264, backward_time=1.028, grad_norm=100.964, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.077e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 15:10:42,121 (trainer:732) INFO: 26epoch:train:9601-9700batch: iter_time=1.164e-04, forward_time=0.147, loss_ctc=69.666, loss_att=56.280, acc=0.686, loss=60.296, backward_time=1.041, grad_norm=97.150, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.076e-05, train_time=2.798 +[gpub005:0/64] 2023-07-08 15:13:00,926 (trainer:732) INFO: 26epoch:train:9701-9800batch: iter_time=1.106e-04, forward_time=0.147, loss_ctc=77.484, loss_att=59.398, acc=0.680, loss=64.824, backward_time=1.034, grad_norm=104.554, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.075e-05, train_time=2.776 +[gpub005:0/64] 2023-07-08 15:15:25,022 (trainer:732) INFO: 26epoch:train:9801-9900batch: iter_time=1.269e-04, forward_time=0.157, loss_ctc=69.791, loss_att=54.546, acc=0.702, loss=59.120, backward_time=1.041, grad_norm=91.940, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.186, optim0_lr0=7.073e-05, train_time=2.882 +[gpub005:0/64] 2023-07-08 15:17:40,647 (trainer:732) INFO: 26epoch:train:9901-10000batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=67.648, loss_att=52.440, acc=0.704, loss=57.002, backward_time=1.025, grad_norm=92.639, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.072e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 15:29:42,436 (trainer:338) INFO: 26epoch results: [train] iter_time=0.204, forward_time=0.147, loss_ctc=72.927, loss_att=55.820, acc=0.700, loss=60.952, backward_time=1.031, grad_norm=104.257, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.143e-05, train_time=3.302, time=4 hours, 35 minutes and 23.69 seconds, total_count=230000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.054, cer_ctc=0.276, loss_att=40.224, acc=0.655, cer=0.436, wer=0.999, loss=42.573, time=5 minutes and 54.42 seconds, total_count=23782, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 57.87 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-08 15:29:57,930 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-08 15:29:57,939 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/21epoch.pth +[gpub005:0/64] 2023-07-08 15:29:57,980 (trainer:272) INFO: 27/30epoch started. Estimated time to finish: 19 hours, 20 minutes and 32.41 seconds +[gpub005:0/64] 2023-07-08 15:29:58,046 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 15:30:15,652 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 15:30:18,988 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 15:30:18,988 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 15:30:18,994 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 15:35:04,256 (trainer:732) INFO: 27epoch:train:1-100batch: iter_time=1.647, forward_time=0.168, loss_ctc=78.316, loss_att=58.961, acc=0.688, loss=64.768, backward_time=1.042, grad_norm=106.995, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.185, optim0_lr0=7.070e-05, train_time=6.125 +[gpub005:0/64] 2023-07-08 15:37:21,077 (trainer:732) INFO: 27epoch:train:101-200batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=65.781, loss_att=51.624, acc=0.711, loss=55.871, backward_time=1.029, grad_norm=94.992, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.069e-05, train_time=2.736 +[gpub005:0/64] 2023-07-08 15:39:39,144 (trainer:732) INFO: 27epoch:train:201-300batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=68.849, loss_att=52.320, acc=0.684, loss=57.279, backward_time=1.027, grad_norm=92.602, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.067e-05, train_time=2.761 +[gpub005:0/64] 2023-07-08 15:41:55,639 (trainer:732) INFO: 27epoch:train:301-400batch: iter_time=1.117e-04, forward_time=0.147, loss_ctc=89.419, loss_att=68.861, acc=0.699, loss=75.029, backward_time=1.031, grad_norm=131.850, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.066e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 15:44:12,562 (trainer:732) INFO: 27epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.146, loss_ctc=71.286, loss_att=59.711, acc=0.692, loss=63.183, backward_time=1.032, grad_norm=96.921, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.065e-05, train_time=2.738 +[gpub005:0/64] 2023-07-08 15:46:35,966 (trainer:732) INFO: 27epoch:train:501-600batch: iter_time=1.069e-04, forward_time=0.144, loss_ctc=63.026, loss_att=47.249, acc=0.702, loss=51.982, backward_time=1.050, grad_norm=101.613, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.063e-05, train_time=2.868 +[gpub005:0/64] 2023-07-08 15:48:57,030 (trainer:732) INFO: 27epoch:train:601-700batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=78.906, loss_att=55.763, acc=0.690, loss=62.706, backward_time=1.033, grad_norm=105.517, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.062e-05, train_time=2.821 +[gpub005:0/64] 2023-07-08 15:51:15,464 (trainer:732) INFO: 27epoch:train:701-800batch: iter_time=1.760e-04, forward_time=0.160, loss_ctc=71.619, loss_att=57.821, acc=0.699, loss=61.960, backward_time=1.029, grad_norm=99.845, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.060e-05, train_time=2.768 +[gpub005:0/64] 2023-07-08 15:52:10,389 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 15:52:28,184 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 15:52:31,555 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 15:52:31,555 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 15:52:31,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 15:56:21,957 (trainer:732) INFO: 27epoch:train:801-900batch: iter_time=1.505, forward_time=0.162, loss_ctc=78.800, loss_att=57.410, acc=0.692, loss=63.827, backward_time=1.046, grad_norm=114.718, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.059e-05, train_time=6.130 +[gpub005:0/64] 2023-07-08 15:58:38,204 (trainer:732) INFO: 27epoch:train:901-1000batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=70.277, loss_att=51.641, acc=0.699, loss=57.232, backward_time=1.026, grad_norm=108.240, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.058e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 16:00:54,182 (trainer:732) INFO: 27epoch:train:1001-1100batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=65.909, loss_att=52.213, acc=0.692, loss=56.322, backward_time=1.028, grad_norm=104.226, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.056e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 16:03:13,716 (trainer:732) INFO: 27epoch:train:1101-1200batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=75.613, loss_att=59.388, acc=0.694, loss=64.256, backward_time=1.033, grad_norm=118.002, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.055e-05, train_time=2.790 +[gpub005:0/64] 2023-07-08 16:05:29,735 (trainer:732) INFO: 27epoch:train:1201-1300batch: iter_time=1.145e-04, forward_time=0.147, loss_ctc=81.972, loss_att=64.720, acc=0.683, loss=69.896, backward_time=1.029, grad_norm=113.873, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.053e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 16:07:45,823 (trainer:732) INFO: 27epoch:train:1301-1400batch: iter_time=1.129e-04, forward_time=0.147, loss_ctc=66.096, loss_att=57.013, acc=0.672, loss=59.738, backward_time=1.028, grad_norm=94.201, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.052e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 16:10:01,331 (trainer:732) INFO: 27epoch:train:1401-1500batch: iter_time=1.266e-04, forward_time=0.146, loss_ctc=77.033, loss_att=53.359, acc=0.689, loss=60.461, backward_time=1.026, grad_norm=105.645, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.051e-05, train_time=2.710 +[gpub005:0/64] 2023-07-08 16:12:16,550 (trainer:732) INFO: 27epoch:train:1501-1600batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=64.493, loss_att=49.540, acc=0.689, loss=54.026, backward_time=1.024, grad_norm=104.991, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.049e-05, train_time=2.704 +[gpub005:0/64] 2023-07-08 16:13:52,617 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 16:14:10,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 16:14:14,392 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 16:14:14,392 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 16:14:14,398 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 16:17:12,466 (trainer:732) INFO: 27epoch:train:1601-1700batch: iter_time=1.393, forward_time=0.146, loss_ctc=72.251, loss_att=57.571, acc=0.694, loss=61.975, backward_time=1.037, grad_norm=118.951, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.048e-05, train_time=5.918 +[gpub005:0/64] 2023-07-08 16:19:29,351 (trainer:732) INFO: 27epoch:train:1701-1800batch: iter_time=1.086e-04, forward_time=0.147, loss_ctc=72.839, loss_att=54.944, acc=0.702, loss=60.312, backward_time=1.033, grad_norm=123.622, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.046e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 16:21:45,108 (trainer:732) INFO: 27epoch:train:1801-1900batch: iter_time=1.087e-04, forward_time=0.145, loss_ctc=61.753, loss_att=49.031, acc=0.707, loss=52.847, backward_time=1.027, grad_norm=99.456, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.045e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 16:24:00,983 (trainer:732) INFO: 27epoch:train:1901-2000batch: iter_time=1.038e-04, forward_time=0.145, loss_ctc=79.566, loss_att=60.500, acc=0.699, loss=66.220, backward_time=1.028, grad_norm=103.999, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.044e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 16:26:17,002 (trainer:732) INFO: 27epoch:train:2001-2100batch: iter_time=1.062e-04, forward_time=0.146, loss_ctc=79.269, loss_att=61.205, acc=0.700, loss=66.624, backward_time=1.029, grad_norm=134.425, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.042e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 16:28:33,016 (trainer:732) INFO: 27epoch:train:2101-2200batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=68.277, loss_att=59.362, acc=0.687, loss=62.036, backward_time=1.029, grad_norm=98.602, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.041e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 16:30:48,891 (trainer:732) INFO: 27epoch:train:2201-2300batch: iter_time=1.125e-04, forward_time=0.146, loss_ctc=75.472, loss_att=52.615, acc=0.695, loss=59.472, backward_time=1.028, grad_norm=107.785, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.039e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 16:33:04,669 (trainer:732) INFO: 27epoch:train:2301-2400batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=65.943, loss_att=50.458, acc=0.702, loss=55.104, backward_time=1.027, grad_norm=108.478, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.038e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 16:35:20,434 (trainer:732) INFO: 27epoch:train:2401-2500batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=73.144, loss_att=58.864, acc=0.697, loss=63.148, backward_time=1.027, grad_norm=94.710, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.037e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 16:35:23,306 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 16:35:41,386 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 16:35:44,939 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 16:35:44,939 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 16:35:44,946 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 16:41:04,598 (trainer:732) INFO: 27epoch:train:2501-2600batch: iter_time=1.263, forward_time=0.173, loss_ctc=80.201, loss_att=57.657, acc=0.706, loss=64.420, backward_time=1.041, grad_norm=123.403, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.035e-05, train_time=6.882 +[gpub005:0/64] 2023-07-08 16:43:21,200 (trainer:732) INFO: 27epoch:train:2601-2700batch: iter_time=1.166e-04, forward_time=0.148, loss_ctc=63.267, loss_att=46.459, acc=0.706, loss=51.501, backward_time=1.029, grad_norm=87.589, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.034e-05, train_time=2.732 +[gpub005:0/64] 2023-07-08 16:45:37,296 (trainer:732) INFO: 27epoch:train:2701-2800batch: iter_time=1.226e-04, forward_time=0.148, loss_ctc=68.250, loss_att=52.710, acc=0.702, loss=57.372, backward_time=1.030, grad_norm=105.430, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.032e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 16:47:53,627 (trainer:732) INFO: 27epoch:train:2801-2900batch: iter_time=1.205e-04, forward_time=0.148, loss_ctc=82.041, loss_att=65.147, acc=0.699, loss=70.215, backward_time=1.032, grad_norm=109.986, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.031e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 16:50:09,459 (trainer:732) INFO: 27epoch:train:2901-3000batch: iter_time=1.224e-04, forward_time=0.148, loss_ctc=73.529, loss_att=60.131, acc=0.703, loss=64.150, backward_time=1.028, grad_norm=96.408, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.030e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 16:52:25,206 (trainer:732) INFO: 27epoch:train:3001-3100batch: iter_time=1.059e-04, forward_time=0.147, loss_ctc=60.358, loss_att=49.299, acc=0.695, loss=52.616, backward_time=1.028, grad_norm=102.512, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.028e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 16:54:41,021 (trainer:732) INFO: 27epoch:train:3101-3200batch: iter_time=1.048e-04, forward_time=0.148, loss_ctc=78.368, loss_att=54.425, acc=0.688, loss=61.608, backward_time=1.028, grad_norm=94.380, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.027e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 16:56:56,755 (trainer:732) INFO: 27epoch:train:3201-3300batch: iter_time=1.043e-04, forward_time=0.147, loss_ctc=68.893, loss_att=56.211, acc=0.703, loss=60.016, backward_time=1.027, grad_norm=106.588, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.026e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 16:57:52,426 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 16:58:10,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 16:58:14,326 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 16:58:14,326 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 16:58:14,332 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 17:03:42,753 (trainer:732) INFO: 27epoch:train:3301-3400batch: iter_time=1.708, forward_time=0.146, loss_ctc=79.437, loss_att=60.997, acc=0.686, loss=66.529, backward_time=1.042, grad_norm=148.834, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.024e-05, train_time=8.120 +[gpub005:0/64] 2023-07-08 17:05:58,787 (trainer:732) INFO: 27epoch:train:3401-3500batch: iter_time=1.422e-04, forward_time=0.145, loss_ctc=63.091, loss_att=46.527, acc=0.710, loss=51.497, backward_time=1.026, grad_norm=97.048, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.023e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 17:08:14,662 (trainer:732) INFO: 27epoch:train:3501-3600batch: iter_time=1.409e-04, forward_time=0.146, loss_ctc=65.612, loss_att=50.551, acc=0.702, loss=55.069, backward_time=1.028, grad_norm=90.485, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.021e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 17:10:30,437 (trainer:732) INFO: 27epoch:train:3601-3700batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=86.543, loss_att=66.397, acc=0.689, loss=72.441, backward_time=1.027, grad_norm=123.487, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.020e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 17:12:46,222 (trainer:732) INFO: 27epoch:train:3701-3800batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=72.226, loss_att=59.576, acc=0.683, loss=63.371, backward_time=1.029, grad_norm=110.341, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.019e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 17:15:01,954 (trainer:732) INFO: 27epoch:train:3801-3900batch: iter_time=1.262e-04, forward_time=0.147, loss_ctc=62.765, loss_att=50.118, acc=0.692, loss=53.912, backward_time=1.026, grad_norm=108.059, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.017e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 17:17:17,653 (trainer:732) INFO: 27epoch:train:3901-4000batch: iter_time=1.348e-04, forward_time=0.146, loss_ctc=79.044, loss_att=57.038, acc=0.681, loss=63.640, backward_time=1.027, grad_norm=113.104, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.016e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 17:19:33,486 (trainer:732) INFO: 27epoch:train:4001-4100batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.698, loss_att=54.565, acc=0.698, loss=58.505, backward_time=1.027, grad_norm=87.633, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.014e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 17:21:07,242 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 17:21:25,710 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 17:21:29,182 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 17:21:29,182 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 17:21:29,188 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 17:25:07,290 (trainer:732) INFO: 27epoch:train:4101-4200batch: iter_time=1.312, forward_time=0.147, loss_ctc=74.376, loss_att=54.791, acc=0.701, loss=60.666, backward_time=1.035, grad_norm=132.213, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.013e-05, train_time=6.676 +[gpub005:0/64] 2023-07-08 17:27:23,791 (trainer:732) INFO: 27epoch:train:4201-4300batch: iter_time=1.339e-04, forward_time=0.147, loss_ctc=70.773, loss_att=52.734, acc=0.694, loss=58.146, backward_time=1.029, grad_norm=107.084, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.012e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 17:29:39,208 (trainer:732) INFO: 27epoch:train:4301-4400batch: iter_time=1.474e-04, forward_time=0.146, loss_ctc=64.956, loss_att=51.106, acc=0.710, loss=55.261, backward_time=1.025, grad_norm=85.824, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.010e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 17:31:59,903 (trainer:732) INFO: 27epoch:train:4401-4500batch: iter_time=1.395e-04, forward_time=0.147, loss_ctc=68.008, loss_att=54.043, acc=0.690, loss=58.233, backward_time=1.031, grad_norm=87.203, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.009e-05, train_time=2.814 +[gpub005:0/64] 2023-07-08 17:34:15,707 (trainer:732) INFO: 27epoch:train:4501-4600batch: iter_time=1.354e-04, forward_time=0.147, loss_ctc=83.914, loss_att=63.789, acc=0.692, loss=69.826, backward_time=1.027, grad_norm=114.171, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.008e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 17:36:31,380 (trainer:732) INFO: 27epoch:train:4601-4700batch: iter_time=1.455e-04, forward_time=0.148, loss_ctc=69.598, loss_att=58.292, acc=0.684, loss=61.684, backward_time=1.026, grad_norm=93.514, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.006e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 17:38:47,070 (trainer:732) INFO: 27epoch:train:4701-4800batch: iter_time=1.468e-04, forward_time=0.148, loss_ctc=62.231, loss_att=46.924, acc=0.696, loss=51.516, backward_time=1.027, grad_norm=93.136, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.005e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 17:41:07,645 (trainer:732) INFO: 27epoch:train:4801-4900batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=74.797, loss_att=53.529, acc=0.688, loss=59.909, backward_time=1.028, grad_norm=106.798, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.003e-05, train_time=2.811 +[gpub005:0/64] 2023-07-08 17:43:23,525 (trainer:732) INFO: 27epoch:train:4901-5000batch: iter_time=9.656e-05, forward_time=0.145, loss_ctc=69.547, loss_att=55.357, acc=0.699, loss=59.614, backward_time=1.027, grad_norm=88.177, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.002e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 17:43:26,168 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 17:43:44,478 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 17:43:47,904 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 17:43:47,905 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 17:43:47,911 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 17:48:43,183 (trainer:732) INFO: 27epoch:train:5001-5100batch: iter_time=1.300, forward_time=0.156, loss_ctc=81.007, loss_att=59.315, acc=0.698, loss=65.823, backward_time=1.042, grad_norm=126.574, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.001e-05, train_time=6.393 +[gpub005:0/64] 2023-07-08 17:50:59,355 (trainer:732) INFO: 27epoch:train:5101-5200batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=63.117, loss_att=45.507, acc=0.712, loss=50.790, backward_time=1.029, grad_norm=105.642, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.999e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 17:53:15,220 (trainer:732) INFO: 27epoch:train:5201-5300batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=66.674, loss_att=51.779, acc=0.705, loss=56.248, backward_time=1.028, grad_norm=93.017, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.998e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 17:55:31,355 (trainer:732) INFO: 27epoch:train:5301-5400batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=80.758, loss_att=65.051, acc=0.690, loss=69.763, backward_time=1.028, grad_norm=115.695, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.997e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 17:57:46,927 (trainer:732) INFO: 27epoch:train:5401-5500batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=74.950, loss_att=60.692, acc=0.688, loss=64.970, backward_time=1.026, grad_norm=100.041, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.995e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 18:00:02,552 (trainer:732) INFO: 27epoch:train:5501-5600batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=60.364, loss_att=48.962, acc=0.693, loss=52.383, backward_time=1.025, grad_norm=89.373, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.994e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 18:02:18,130 (trainer:732) INFO: 27epoch:train:5601-5700batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=77.107, loss_att=53.744, acc=0.684, loss=60.753, backward_time=1.027, grad_norm=102.369, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.992e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 18:04:33,692 (trainer:732) INFO: 27epoch:train:5701-5800batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=67.366, loss_att=53.893, acc=0.700, loss=57.935, backward_time=1.026, grad_norm=103.715, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.991e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 18:05:27,389 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 18:05:45,505 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 18:05:48,945 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 18:05:48,946 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-08 18:05:48,952 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 18:09:46,628 (trainer:732) INFO: 27epoch:train:5801-5900batch: iter_time=1.481, forward_time=0.206, loss_ctc=77.422, loss_att=54.274, acc=0.707, loss=61.219, backward_time=1.053, grad_norm=108.922, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.187, optim0_lr0=6.990e-05, train_time=6.259 +[gpub005:0/64] 2023-07-08 18:12:03,207 (trainer:732) INFO: 27epoch:train:5901-6000batch: iter_time=1.133e-04, forward_time=0.147, loss_ctc=67.668, loss_att=49.007, acc=0.719, loss=54.605, backward_time=1.029, grad_norm=105.149, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.988e-05, train_time=2.731 +[gpub005:0/64] 2023-07-08 18:14:18,983 (trainer:732) INFO: 27epoch:train:6001-6100batch: iter_time=1.019e-04, forward_time=0.146, loss_ctc=65.649, loss_att=52.644, acc=0.704, loss=56.546, backward_time=1.029, grad_norm=82.415, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.987e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 18:16:35,266 (trainer:732) INFO: 27epoch:train:6101-6200batch: iter_time=1.118e-04, forward_time=0.147, loss_ctc=74.745, loss_att=57.661, acc=0.710, loss=62.786, backward_time=1.032, grad_norm=103.839, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.986e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 18:18:51,419 (trainer:732) INFO: 27epoch:train:6201-6300batch: iter_time=1.098e-04, forward_time=0.147, loss_ctc=80.242, loss_att=63.110, acc=0.705, loss=68.249, backward_time=1.031, grad_norm=123.740, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.984e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 18:21:07,449 (trainer:732) INFO: 27epoch:train:6301-6400batch: iter_time=1.090e-04, forward_time=0.147, loss_ctc=64.917, loss_att=56.070, acc=0.689, loss=58.724, backward_time=1.029, grad_norm=94.389, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.983e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 18:23:23,293 (trainer:732) INFO: 27epoch:train:6401-6500batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=75.776, loss_att=53.223, acc=0.701, loss=59.989, backward_time=1.026, grad_norm=128.056, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.982e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 18:25:41,985 (trainer:732) INFO: 27epoch:train:6501-6600batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=63.831, loss_att=48.758, acc=0.705, loss=53.280, backward_time=1.030, grad_norm=111.720, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.980e-05, train_time=2.774 +[gpub005:0/64] 2023-07-08 18:27:20,934 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 18:27:39,116 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 18:27:42,618 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 18:27:42,618 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 18:27:42,625 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 18:31:54,283 (trainer:732) INFO: 27epoch:train:6601-6700batch: iter_time=1.661, forward_time=0.168, loss_ctc=78.094, loss_att=55.859, acc=0.715, loss=62.530, backward_time=1.038, grad_norm=110.280, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.979e-05, train_time=7.445 +[gpub005:0/64] 2023-07-08 18:34:11,650 (trainer:732) INFO: 27epoch:train:6701-6800batch: iter_time=1.183e-04, forward_time=0.147, loss_ctc=71.392, loss_att=52.314, acc=0.710, loss=58.037, backward_time=1.032, grad_norm=109.454, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.977e-05, train_time=2.748 +[gpub005:0/64] 2023-07-08 18:36:27,985 (trainer:732) INFO: 27epoch:train:6801-6900batch: iter_time=1.155e-04, forward_time=0.147, loss_ctc=64.101, loss_att=51.370, acc=0.716, loss=55.189, backward_time=1.029, grad_norm=109.963, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.976e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 18:38:44,206 (trainer:732) INFO: 27epoch:train:6901-7000batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=68.462, loss_att=54.516, acc=0.697, loss=58.700, backward_time=1.029, grad_norm=85.796, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.975e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 18:41:00,068 (trainer:732) INFO: 27epoch:train:7001-7100batch: iter_time=1.081e-04, forward_time=0.146, loss_ctc=84.776, loss_att=62.796, acc=0.708, loss=69.390, backward_time=1.027, grad_norm=105.049, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.973e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 18:43:15,964 (trainer:732) INFO: 27epoch:train:7101-7200batch: iter_time=1.084e-04, forward_time=0.147, loss_ctc=70.156, loss_att=58.438, acc=0.703, loss=61.953, backward_time=1.028, grad_norm=91.886, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.972e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 18:45:31,769 (trainer:732) INFO: 27epoch:train:7201-7300batch: iter_time=1.110e-04, forward_time=0.147, loss_ctc=62.426, loss_att=47.189, acc=0.712, loss=51.760, backward_time=1.027, grad_norm=99.793, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.971e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 18:47:47,563 (trainer:732) INFO: 27epoch:train:7301-7400batch: iter_time=1.171e-04, forward_time=0.146, loss_ctc=74.215, loss_att=52.281, acc=0.700, loss=58.861, backward_time=1.029, grad_norm=103.237, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.969e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 18:50:03,368 (trainer:732) INFO: 27epoch:train:7401-7500batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=69.100, loss_att=56.675, acc=0.701, loss=60.403, backward_time=1.028, grad_norm=93.451, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.968e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 18:50:06,411 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 18:50:24,992 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 18:50:28,681 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 18:50:28,681 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 18:50:28,687 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 18:56:41,552 (trainer:732) INFO: 27epoch:train:7501-7600batch: iter_time=1.468, forward_time=0.146, loss_ctc=79.702, loss_att=58.690, acc=0.704, loss=64.993, backward_time=1.043, grad_norm=117.599, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.967e-05, train_time=7.963 +[gpub005:0/64] 2023-07-08 18:58:59,435 (trainer:732) INFO: 27epoch:train:7601-7700batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=62.698, loss_att=45.702, acc=0.715, loss=50.801, backward_time=1.030, grad_norm=86.375, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.965e-05, train_time=2.758 +[gpub005:0/64] 2023-07-08 19:01:20,171 (trainer:732) INFO: 27epoch:train:7701-7800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=68.524, loss_att=51.985, acc=0.704, loss=56.947, backward_time=1.041, grad_norm=108.704, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.964e-05, train_time=2.815 +[gpub005:0/64] 2023-07-08 19:03:36,317 (trainer:732) INFO: 27epoch:train:7801-7900batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=82.146, loss_att=67.111, acc=0.692, loss=71.621, backward_time=1.030, grad_norm=116.046, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.963e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 19:05:52,107 (trainer:732) INFO: 27epoch:train:7901-8000batch: iter_time=1.352e-04, forward_time=0.146, loss_ctc=71.994, loss_att=57.649, acc=0.695, loss=61.952, backward_time=1.025, grad_norm=131.857, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.961e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 19:08:09,009 (trainer:732) INFO: 27epoch:train:8001-8100batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=61.132, loss_att=49.614, acc=0.689, loss=53.069, backward_time=1.032, grad_norm=103.490, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.960e-05, train_time=2.738 +[gpub005:0/64] 2023-07-08 19:10:25,679 (trainer:732) INFO: 27epoch:train:8101-8200batch: iter_time=1.177e-04, forward_time=0.144, loss_ctc=77.024, loss_att=54.171, acc=0.687, loss=61.027, backward_time=1.027, grad_norm=100.882, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.959e-05, train_time=2.733 +[gpub005:0/64] 2023-07-08 19:12:41,248 (trainer:732) INFO: 27epoch:train:8201-8300batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=66.224, loss_att=53.289, acc=0.703, loss=57.170, backward_time=1.025, grad_norm=103.207, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.957e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 19:13:32,010 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 19:13:50,361 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 19:13:54,092 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 19:13:54,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 19:13:54,099 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 19:19:15,593 (trainer:732) INFO: 27epoch:train:8301-8400batch: iter_time=1.514, forward_time=0.147, loss_ctc=77.360, loss_att=60.550, acc=0.688, loss=65.593, backward_time=1.044, grad_norm=115.246, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.956e-05, train_time=7.887 +[gpub005:0/64] 2023-07-08 19:21:32,453 (trainer:732) INFO: 27epoch:train:8401-8500batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=63.582, loss_att=45.791, acc=0.714, loss=51.128, backward_time=1.026, grad_norm=87.132, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.954e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 19:23:49,755 (trainer:732) INFO: 27epoch:train:8501-8600batch: iter_time=1.181e-04, forward_time=0.145, loss_ctc=68.442, loss_att=51.771, acc=0.700, loss=56.772, backward_time=1.029, grad_norm=94.365, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.953e-05, train_time=2.746 +[gpub005:0/64] 2023-07-08 19:26:06,069 (trainer:732) INFO: 27epoch:train:8601-8700batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=82.786, loss_att=63.906, acc=0.692, loss=69.570, backward_time=1.027, grad_norm=126.559, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.952e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 19:28:22,597 (trainer:732) INFO: 27epoch:train:8701-8800batch: iter_time=1.244e-04, forward_time=0.147, loss_ctc=70.784, loss_att=58.190, acc=0.689, loss=61.968, backward_time=1.029, grad_norm=121.368, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.950e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 19:30:38,374 (trainer:732) INFO: 27epoch:train:8801-8900batch: iter_time=1.201e-04, forward_time=0.147, loss_ctc=60.373, loss_att=48.428, acc=0.694, loss=52.011, backward_time=1.028, grad_norm=99.372, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.949e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 19:32:53,800 (trainer:732) INFO: 27epoch:train:8901-9000batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=76.119, loss_att=54.793, acc=0.685, loss=61.191, backward_time=1.026, grad_norm=101.698, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.948e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 19:35:09,452 (trainer:732) INFO: 27epoch:train:9001-9100batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=67.817, loss_att=54.263, acc=0.701, loss=58.329, backward_time=1.028, grad_norm=89.440, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.946e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 19:36:41,580 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 19:36:59,716 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 19:37:03,195 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 19:37:03,195 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 19:37:03,201 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 19:41:40,242 (trainer:732) INFO: 27epoch:train:9101-9200batch: iter_time=1.268, forward_time=0.157, loss_ctc=74.223, loss_att=53.143, acc=0.708, loss=59.467, backward_time=1.037, grad_norm=111.466, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.945e-05, train_time=7.816 +[gpub005:0/64] 2023-07-08 19:43:56,604 (trainer:732) INFO: 27epoch:train:9201-9300batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=71.313, loss_att=52.868, acc=0.710, loss=58.401, backward_time=1.029, grad_norm=113.150, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.944e-05, train_time=2.727 +[gpub005:0/64] 2023-07-08 19:46:13,420 (trainer:732) INFO: 27epoch:train:9301-9400batch: iter_time=1.309e-04, forward_time=0.145, loss_ctc=64.237, loss_att=50.871, acc=0.717, loss=54.881, backward_time=1.026, grad_norm=98.725, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.942e-05, train_time=2.736 +[gpub005:0/64] 2023-07-08 19:48:30,885 (trainer:732) INFO: 27epoch:train:9401-9500batch: iter_time=1.306e-04, forward_time=0.145, loss_ctc=67.251, loss_att=53.854, acc=0.700, loss=57.873, backward_time=1.028, grad_norm=86.885, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.941e-05, train_time=2.749 +[gpub005:0/64] 2023-07-08 19:50:47,871 (trainer:732) INFO: 27epoch:train:9501-9600batch: iter_time=1.344e-04, forward_time=0.146, loss_ctc=82.998, loss_att=63.271, acc=0.704, loss=69.189, backward_time=1.029, grad_norm=100.681, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.940e-05, train_time=2.740 +[gpub005:0/64] 2023-07-08 19:53:04,040 (trainer:732) INFO: 27epoch:train:9601-9700batch: iter_time=1.091e-04, forward_time=0.147, loss_ctc=69.726, loss_att=58.788, acc=0.700, loss=62.069, backward_time=1.030, grad_norm=100.765, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.938e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 19:55:19,699 (trainer:732) INFO: 27epoch:train:9701-9800batch: iter_time=1.084e-04, forward_time=0.147, loss_ctc=62.256, loss_att=46.889, acc=0.713, loss=51.499, backward_time=1.028, grad_norm=99.634, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.937e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 19:57:35,506 (trainer:732) INFO: 27epoch:train:9801-9900batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=73.700, loss_att=52.435, acc=0.704, loss=58.814, backward_time=1.028, grad_norm=105.162, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.936e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 19:59:50,966 (trainer:732) INFO: 27epoch:train:9901-10000batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=69.244, loss_att=56.460, acc=0.701, loss=60.295, backward_time=1.026, grad_norm=95.619, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.934e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 20:13:31,090 (trainer:338) INFO: 27epoch results: [train] iter_time=0.175, forward_time=0.148, loss_ctc=71.614, loss_att=55.101, acc=0.698, loss=60.055, backward_time=1.030, grad_norm=105.167, clip=100.000, loss_scale=7.858e+23, optim_step_time=0.183, optim0_lr0=7.002e-05, train_time=3.238, time=4 hours, 30 minutes and 8.18 seconds, total_count=240000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.741, cer_ctc=0.274, loss_att=40.713, acc=0.655, cer=0.437, wer=1.000, loss=43.121, time=7 minutes and 6.84 seconds, total_count=24794, gpu_max_cached_mem_GB=38.234, [att_plot] time=6 minutes and 18.07 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-08 20:13:46,534 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-08 20:13:46,675 (trainer:272) INFO: 28/30epoch started. Estimated time to finish: 14 hours, 26 minutes and 36.64 seconds +[gpub005:0/64] 2023-07-08 20:13:46,742 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 20:14:06,778 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 20:14:10,729 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 20:14:10,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 20:14:10,758 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 20:20:40,880 (trainer:732) INFO: 28epoch:train:1-100batch: iter_time=2.719, forward_time=0.161, loss_ctc=74.009, loss_att=55.912, acc=0.677, loss=61.341, backward_time=1.048, grad_norm=106.864, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.187, optim0_lr0=6.933e-05, train_time=8.284 +[gpub005:0/64] 2023-07-08 20:22:58,189 (trainer:732) INFO: 28epoch:train:101-200batch: iter_time=1.283e-04, forward_time=0.146, loss_ctc=77.783, loss_att=59.524, acc=0.688, loss=65.002, backward_time=1.028, grad_norm=110.041, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.932e-05, train_time=2.746 +[gpub005:0/64] 2023-07-08 20:25:14,175 (trainer:732) INFO: 28epoch:train:201-300batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=78.368, loss_att=57.773, acc=0.689, loss=63.952, backward_time=1.027, grad_norm=110.026, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.930e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 20:27:33,894 (trainer:732) INFO: 28epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.166, loss_ctc=86.210, loss_att=62.958, acc=0.681, loss=69.933, backward_time=1.033, grad_norm=125.836, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.185, optim0_lr0=6.929e-05, train_time=2.794 +[gpub005:0/64] 2023-07-08 20:29:54,389 (trainer:732) INFO: 28epoch:train:401-500batch: iter_time=1.213e-04, forward_time=0.144, loss_ctc=68.699, loss_att=49.263, acc=0.703, loss=55.094, backward_time=1.030, grad_norm=113.824, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.928e-05, train_time=2.810 +[gpub005:0/64] 2023-07-08 20:32:17,226 (trainer:732) INFO: 28epoch:train:501-600batch: iter_time=1.182e-04, forward_time=0.145, loss_ctc=56.148, loss_att=39.661, acc=0.713, loss=44.607, backward_time=1.035, grad_norm=83.985, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.926e-05, train_time=2.857 +[gpub005:0/64] 2023-07-08 20:34:34,235 (trainer:732) INFO: 28epoch:train:601-700batch: iter_time=1.286e-04, forward_time=0.144, loss_ctc=72.009, loss_att=54.099, acc=0.698, loss=59.472, backward_time=1.032, grad_norm=134.899, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.925e-05, train_time=2.740 +[gpub005:0/64] 2023-07-08 20:36:55,642 (trainer:732) INFO: 28epoch:train:701-800batch: iter_time=1.265e-04, forward_time=0.144, loss_ctc=71.921, loss_att=54.806, acc=0.689, loss=59.941, backward_time=1.031, grad_norm=100.285, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.924e-05, train_time=2.828 +[gpub005:0/64] 2023-07-08 20:37:44,836 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 20:38:02,541 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 20:38:06,124 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 20:38:06,124 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 20:38:06,130 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 20:41:59,143 (trainer:732) INFO: 28epoch:train:801-900batch: iter_time=1.278, forward_time=0.167, loss_ctc=83.852, loss_att=67.054, acc=0.679, loss=72.094, backward_time=1.041, grad_norm=123.747, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.922e-05, train_time=6.070 +[gpub005:0/64] 2023-07-08 20:44:16,329 (trainer:732) INFO: 28epoch:train:901-1000batch: iter_time=1.260e-04, forward_time=0.146, loss_ctc=72.535, loss_att=53.389, acc=0.704, loss=59.133, backward_time=1.032, grad_norm=104.932, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.921e-05, train_time=2.743 +[gpub005:0/64] 2023-07-08 20:46:32,836 (trainer:732) INFO: 28epoch:train:1001-1100batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=81.503, loss_att=62.255, acc=0.693, loss=68.029, backward_time=1.030, grad_norm=104.731, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.920e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 20:48:49,066 (trainer:732) INFO: 28epoch:train:1101-1200batch: iter_time=1.186e-04, forward_time=0.145, loss_ctc=82.031, loss_att=62.754, acc=0.695, loss=68.537, backward_time=1.030, grad_norm=126.182, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.918e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 20:51:04,740 (trainer:732) INFO: 28epoch:train:1201-1300batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=68.969, loss_att=49.412, acc=0.698, loss=55.279, backward_time=1.027, grad_norm=110.693, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.917e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 20:53:20,512 (trainer:732) INFO: 28epoch:train:1301-1400batch: iter_time=1.139e-04, forward_time=0.145, loss_ctc=63.955, loss_att=43.837, acc=0.724, loss=49.873, backward_time=1.027, grad_norm=86.116, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.916e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 20:55:36,243 (trainer:732) INFO: 28epoch:train:1401-1500batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=62.174, loss_att=44.982, acc=0.721, loss=50.140, backward_time=1.029, grad_norm=99.535, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.914e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 20:57:52,044 (trainer:732) INFO: 28epoch:train:1501-1600batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=75.891, loss_att=56.730, acc=0.703, loss=62.479, backward_time=1.029, grad_norm=135.198, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.913e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 20:59:33,182 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 20:59:51,725 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 20:59:55,498 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 20:59:55,498 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 20:59:55,504 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 21:03:46,841 (trainer:732) INFO: 28epoch:train:1601-1700batch: iter_time=1.392, forward_time=0.151, loss_ctc=80.220, loss_att=63.955, acc=0.696, loss=68.834, backward_time=1.039, grad_norm=127.333, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.189, optim0_lr0=6.912e-05, train_time=7.095 +[gpub005:0/64] 2023-07-08 21:06:03,098 (trainer:732) INFO: 28epoch:train:1701-1800batch: iter_time=1.185e-04, forward_time=0.146, loss_ctc=72.712, loss_att=55.287, acc=0.677, loss=60.514, backward_time=1.029, grad_norm=100.144, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.910e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 21:08:19,000 (trainer:732) INFO: 28epoch:train:1801-1900batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=76.947, loss_att=57.906, acc=0.692, loss=63.618, backward_time=1.027, grad_norm=115.473, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.909e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 21:10:35,710 (trainer:732) INFO: 28epoch:train:1901-2000batch: iter_time=1.219e-04, forward_time=0.147, loss_ctc=76.026, loss_att=56.343, acc=0.691, loss=62.248, backward_time=1.031, grad_norm=107.646, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.908e-05, train_time=2.734 +[gpub005:0/64] 2023-07-08 21:12:54,818 (trainer:732) INFO: 28epoch:train:2001-2100batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=83.541, loss_att=60.894, acc=0.684, loss=67.688, backward_time=1.030, grad_norm=115.416, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.907e-05, train_time=2.782 +[gpub005:0/64] 2023-07-08 21:15:20,099 (trainer:732) INFO: 28epoch:train:2101-2200batch: iter_time=1.214e-04, forward_time=0.145, loss_ctc=67.313, loss_att=48.446, acc=0.713, loss=54.106, backward_time=1.040, grad_norm=89.259, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.905e-05, train_time=2.905 +[gpub005:0/64] 2023-07-08 21:17:35,563 (trainer:732) INFO: 28epoch:train:2201-2300batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=54.785, loss_att=40.768, acc=0.708, loss=44.973, backward_time=1.026, grad_norm=106.678, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.904e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 21:19:51,268 (trainer:732) INFO: 28epoch:train:2301-2400batch: iter_time=1.059e-04, forward_time=0.146, loss_ctc=70.891, loss_att=51.557, acc=0.706, loss=57.357, backward_time=1.028, grad_norm=101.405, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.903e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 21:22:06,949 (trainer:732) INFO: 28epoch:train:2401-2500batch: iter_time=1.033e-04, forward_time=0.146, loss_ctc=72.815, loss_att=54.854, acc=0.693, loss=60.242, backward_time=1.028, grad_norm=90.955, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.901e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 21:22:16,616 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 21:22:34,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 21:22:38,318 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 21:22:38,318 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 21:22:38,324 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 21:28:14,931 (trainer:732) INFO: 28epoch:train:2501-2600batch: iter_time=2.263, forward_time=0.151, loss_ctc=73.063, loss_att=56.968, acc=0.690, loss=61.797, backward_time=1.043, grad_norm=109.171, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.900e-05, train_time=7.359 +[gpub005:0/64] 2023-07-08 21:30:31,457 (trainer:732) INFO: 28epoch:train:2601-2700batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=72.709, loss_att=55.640, acc=0.701, loss=60.761, backward_time=1.030, grad_norm=110.117, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.899e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 21:32:47,437 (trainer:732) INFO: 28epoch:train:2701-2800batch: iter_time=1.193e-04, forward_time=0.148, loss_ctc=81.634, loss_att=59.340, acc=0.699, loss=66.029, backward_time=1.029, grad_norm=103.358, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.897e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 21:35:03,491 (trainer:732) INFO: 28epoch:train:2801-2900batch: iter_time=1.088e-04, forward_time=0.147, loss_ctc=80.592, loss_att=60.457, acc=0.699, loss=66.498, backward_time=1.031, grad_norm=117.781, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.896e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 21:37:19,461 (trainer:732) INFO: 28epoch:train:2901-3000batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=68.204, loss_att=49.213, acc=0.699, loss=54.910, backward_time=1.029, grad_norm=90.669, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.895e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 21:39:35,137 (trainer:732) INFO: 28epoch:train:3001-3100batch: iter_time=1.116e-04, forward_time=0.147, loss_ctc=62.732, loss_att=41.881, acc=0.732, loss=48.137, backward_time=1.028, grad_norm=92.578, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.893e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 21:41:50,956 (trainer:732) INFO: 28epoch:train:3101-3200batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=63.201, loss_att=47.922, acc=0.716, loss=52.506, backward_time=1.028, grad_norm=82.434, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.892e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 21:44:06,741 (trainer:732) INFO: 28epoch:train:3201-3300batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=73.855, loss_att=52.520, acc=0.715, loss=58.920, backward_time=1.028, grad_norm=95.321, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.891e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 21:44:56,339 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 21:45:14,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 21:45:18,307 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 21:45:18,308 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 21:45:18,314 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 21:49:50,797 (trainer:732) INFO: 28epoch:train:3301-3400batch: iter_time=1.410, forward_time=0.147, loss_ctc=67.104, loss_att=52.257, acc=0.697, loss=56.711, backward_time=1.043, grad_norm=105.434, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.889e-05, train_time=6.881 +[gpub005:0/64] 2023-07-08 21:52:07,052 (trainer:732) INFO: 28epoch:train:3401-3500batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=73.493, loss_att=55.952, acc=0.687, loss=61.214, backward_time=1.028, grad_norm=92.809, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.888e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 21:54:22,857 (trainer:732) INFO: 28epoch:train:3501-3600batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=81.704, loss_att=61.798, acc=0.682, loss=67.770, backward_time=1.027, grad_norm=108.375, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.887e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 21:56:38,614 (trainer:732) INFO: 28epoch:train:3601-3700batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=77.647, loss_att=60.096, acc=0.700, loss=65.361, backward_time=1.026, grad_norm=106.625, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.886e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 21:58:54,236 (trainer:732) INFO: 28epoch:train:3701-3800batch: iter_time=1.156e-04, forward_time=0.145, loss_ctc=74.147, loss_att=50.797, acc=0.688, loss=57.802, backward_time=1.026, grad_norm=102.912, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.884e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 22:01:09,914 (trainer:732) INFO: 28epoch:train:3801-3900batch: iter_time=1.187e-04, forward_time=0.144, loss_ctc=65.999, loss_att=48.726, acc=0.715, loss=53.908, backward_time=1.028, grad_norm=94.109, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.883e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 22:03:25,396 (trainer:732) INFO: 28epoch:train:3901-4000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=57.597, loss_att=41.269, acc=0.713, loss=46.168, backward_time=1.026, grad_norm=85.071, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.882e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 22:05:41,244 (trainer:732) INFO: 28epoch:train:4001-4100batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=76.578, loss_att=55.734, acc=0.705, loss=61.987, backward_time=1.029, grad_norm=122.779, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.880e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 22:07:13,468 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 22:07:31,740 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 22:07:35,467 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 22:07:35,467 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 22:07:35,473 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 22:11:06,603 (trainer:732) INFO: 28epoch:train:4101-4200batch: iter_time=1.257, forward_time=0.146, loss_ctc=69.702, loss_att=51.949, acc=0.705, loss=57.275, backward_time=1.039, grad_norm=91.533, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.879e-05, train_time=6.507 +[gpub005:0/64] 2023-07-08 22:13:23,067 (trainer:732) INFO: 28epoch:train:4201-4300batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=72.398, loss_att=55.641, acc=0.682, loss=60.668, backward_time=1.030, grad_norm=93.124, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.878e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 22:15:38,757 (trainer:732) INFO: 28epoch:train:4301-4400batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=75.659, loss_att=56.297, acc=0.700, loss=62.106, backward_time=1.027, grad_norm=100.781, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.876e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 22:17:54,550 (trainer:732) INFO: 28epoch:train:4401-4500batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=76.934, loss_att=56.013, acc=0.694, loss=62.289, backward_time=1.027, grad_norm=103.762, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.875e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 22:20:10,415 (trainer:732) INFO: 28epoch:train:4501-4600batch: iter_time=1.173e-04, forward_time=0.146, loss_ctc=82.080, loss_att=60.106, acc=0.689, loss=66.698, backward_time=1.028, grad_norm=118.848, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.874e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 22:22:26,037 (trainer:732) INFO: 28epoch:train:4601-4700batch: iter_time=1.057e-04, forward_time=0.146, loss_ctc=66.639, loss_att=48.499, acc=0.718, loss=53.941, backward_time=1.027, grad_norm=90.376, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.873e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 22:24:41,674 (trainer:732) INFO: 28epoch:train:4701-4800batch: iter_time=1.013e-04, forward_time=0.147, loss_ctc=54.555, loss_att=40.462, acc=0.714, loss=44.690, backward_time=1.027, grad_norm=90.199, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.871e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 22:26:57,140 (trainer:732) INFO: 28epoch:train:4801-4900batch: iter_time=1.256e-04, forward_time=0.144, loss_ctc=69.488, loss_att=50.570, acc=0.708, loss=56.246, backward_time=1.025, grad_norm=110.521, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.870e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 22:29:26,869 (trainer:732) INFO: 28epoch:train:4901-5000batch: iter_time=1.214e-04, forward_time=0.173, loss_ctc=72.734, loss_att=55.263, acc=0.695, loss=60.504, backward_time=1.071, grad_norm=108.435, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.869e-05, train_time=2.994 +[gpub005:0/64] 2023-07-08 22:29:31,147 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 22:29:49,288 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 22:29:52,760 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 22:29:52,760 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 22:29:52,766 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 22:35:12,143 (trainer:732) INFO: 28epoch:train:5001-5100batch: iter_time=1.294, forward_time=0.148, loss_ctc=72.245, loss_att=56.263, acc=0.695, loss=61.058, backward_time=1.050, grad_norm=99.183, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.867e-05, train_time=6.905 +[gpub005:0/64] 2023-07-08 22:37:28,422 (trainer:732) INFO: 28epoch:train:5101-5200batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=72.485, loss_att=56.428, acc=0.701, loss=61.245, backward_time=1.031, grad_norm=105.729, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.866e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 22:39:44,453 (trainer:732) INFO: 28epoch:train:5201-5300batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=82.505, loss_att=60.573, acc=0.698, loss=67.153, backward_time=1.030, grad_norm=117.962, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.865e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 22:42:00,678 (trainer:732) INFO: 28epoch:train:5301-5400batch: iter_time=1.214e-04, forward_time=0.147, loss_ctc=77.682, loss_att=59.365, acc=0.704, loss=64.860, backward_time=1.031, grad_norm=108.475, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.863e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 22:44:16,338 (trainer:732) INFO: 28epoch:train:5401-5500batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=70.316, loss_att=50.612, acc=0.699, loss=56.524, backward_time=1.028, grad_norm=95.879, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.862e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 22:46:32,135 (trainer:732) INFO: 28epoch:train:5501-5600batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=59.824, loss_att=40.367, acc=0.741, loss=46.204, backward_time=1.028, grad_norm=86.453, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.861e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 22:48:48,069 (trainer:732) INFO: 28epoch:train:5601-5700batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=62.478, loss_att=47.072, acc=0.721, loss=51.693, backward_time=1.028, grad_norm=97.361, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.860e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 22:51:03,830 (trainer:732) INFO: 28epoch:train:5701-5800batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=73.749, loss_att=52.299, acc=0.714, loss=58.734, backward_time=1.027, grad_norm=105.786, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.858e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 22:51:51,144 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 22:52:09,396 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 22:52:12,905 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 22:52:12,905 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 22:52:12,911 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 22:58:01,898 (trainer:732) INFO: 28epoch:train:5801-5900batch: iter_time=1.311, forward_time=0.146, loss_ctc=72.764, loss_att=56.765, acc=0.696, loss=61.565, backward_time=1.052, grad_norm=112.026, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.857e-05, train_time=8.361 +[gpub005:0/64] 2023-07-08 23:00:18,123 (trainer:732) INFO: 28epoch:train:5901-6000batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=71.428, loss_att=53.963, acc=0.702, loss=59.203, backward_time=1.029, grad_norm=114.929, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.856e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 23:02:33,896 (trainer:732) INFO: 28epoch:train:6001-6100batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=79.868, loss_att=61.221, acc=0.697, loss=66.815, backward_time=1.028, grad_norm=100.067, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.854e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 23:04:49,978 (trainer:732) INFO: 28epoch:train:6101-6200batch: iter_time=1.071e-04, forward_time=0.146, loss_ctc=78.402, loss_att=59.583, acc=0.701, loss=65.229, backward_time=1.030, grad_norm=112.131, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.853e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 23:07:05,436 (trainer:732) INFO: 28epoch:train:6201-6300batch: iter_time=1.087e-04, forward_time=0.144, loss_ctc=67.248, loss_att=47.858, acc=0.704, loss=53.675, backward_time=1.026, grad_norm=110.296, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.852e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 23:09:20,706 (trainer:732) INFO: 28epoch:train:6301-6400batch: iter_time=1.095e-04, forward_time=0.145, loss_ctc=63.660, loss_att=43.054, acc=0.732, loss=49.236, backward_time=1.026, grad_norm=83.660, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.851e-05, train_time=2.705 +[gpub005:0/64] 2023-07-08 23:11:36,441 (trainer:732) INFO: 28epoch:train:6401-6500batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=60.251, loss_att=43.848, acc=0.724, loss=48.769, backward_time=1.028, grad_norm=91.709, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.849e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 23:13:52,452 (trainer:732) INFO: 28epoch:train:6501-6600batch: iter_time=1.138e-04, forward_time=0.147, loss_ctc=74.912, loss_att=55.471, acc=0.707, loss=61.303, backward_time=1.030, grad_norm=89.579, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.848e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 23:15:26,907 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 23:15:45,143 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 23:15:48,597 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 23:15:48,597 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 23:15:48,603 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 23:20:55,660 (trainer:732) INFO: 28epoch:train:6601-6700batch: iter_time=1.286, forward_time=0.153, loss_ctc=79.725, loss_att=63.645, acc=0.703, loss=68.469, backward_time=1.041, grad_norm=117.042, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.185, optim0_lr0=6.847e-05, train_time=8.464 +[gpub005:0/64] 2023-07-08 23:23:21,217 (trainer:732) INFO: 28epoch:train:6701-6800batch: iter_time=1.228e-04, forward_time=0.164, loss_ctc=72.691, loss_att=55.014, acc=0.698, loss=60.317, backward_time=1.046, grad_norm=109.801, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.845e-05, train_time=2.911 +[gpub005:0/64] 2023-07-08 23:25:38,727 (trainer:732) INFO: 28epoch:train:6801-6900batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=75.359, loss_att=57.243, acc=0.704, loss=62.678, backward_time=1.029, grad_norm=107.335, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.844e-05, train_time=2.750 +[gpub005:0/64] 2023-07-08 23:27:54,797 (trainer:732) INFO: 28epoch:train:6901-7000batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=74.894, loss_att=54.136, acc=0.713, loss=60.363, backward_time=1.029, grad_norm=108.816, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.843e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 23:30:11,239 (trainer:732) INFO: 28epoch:train:7001-7100batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=81.357, loss_att=58.035, acc=0.695, loss=65.032, backward_time=1.032, grad_norm=149.789, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.842e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 23:32:40,425 (trainer:732) INFO: 28epoch:train:7101-7200batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=66.158, loss_att=46.689, acc=0.720, loss=52.530, backward_time=1.049, grad_norm=111.917, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.840e-05, train_time=2.983 +[gpub005:0/64] 2023-07-08 23:34:58,213 (trainer:732) INFO: 28epoch:train:7201-7300batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=54.101, loss_att=39.666, acc=0.729, loss=43.996, backward_time=1.033, grad_norm=92.941, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.839e-05, train_time=2.756 +[gpub005:0/64] 2023-07-08 23:37:14,151 (trainer:732) INFO: 28epoch:train:7301-7400batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=68.799, loss_att=50.354, acc=0.721, loss=55.888, backward_time=1.029, grad_norm=109.989, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.838e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 23:39:31,461 (trainer:732) INFO: 28epoch:train:7401-7500batch: iter_time=1.039e-04, forward_time=0.145, loss_ctc=72.077, loss_att=54.561, acc=0.706, loss=59.816, backward_time=1.030, grad_norm=91.086, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.836e-05, train_time=2.746 +[gpub005:0/64] 2023-07-08 23:39:34,674 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 23:39:53,142 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 23:39:56,612 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 23:39:56,612 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 23:39:56,618 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 23:45:44,315 (trainer:732) INFO: 28epoch:train:7501-7600batch: iter_time=1.328, forward_time=0.147, loss_ctc=71.380, loss_att=56.723, acc=0.686, loss=61.120, backward_time=1.041, grad_norm=103.229, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.835e-05, train_time=7.457 +[gpub005:0/64] 2023-07-08 23:48:00,073 (trainer:732) INFO: 28epoch:train:7601-7700batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=72.398, loss_att=57.071, acc=0.691, loss=61.669, backward_time=1.027, grad_norm=89.683, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.834e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 23:50:16,768 (trainer:732) INFO: 28epoch:train:7701-7800batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=78.542, loss_att=58.230, acc=0.695, loss=64.323, backward_time=1.027, grad_norm=116.271, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.833e-05, train_time=2.734 +[gpub005:0/64] 2023-07-08 23:52:32,396 (trainer:732) INFO: 28epoch:train:7801-7900batch: iter_time=1.381e-04, forward_time=0.144, loss_ctc=78.828, loss_att=58.826, acc=0.698, loss=64.827, backward_time=1.027, grad_norm=109.064, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.831e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 23:54:48,243 (trainer:732) INFO: 28epoch:train:7901-8000batch: iter_time=1.203e-04, forward_time=0.145, loss_ctc=69.930, loss_att=50.336, acc=0.706, loss=56.214, backward_time=1.027, grad_norm=96.055, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.830e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 23:57:03,582 (trainer:732) INFO: 28epoch:train:8001-8100batch: iter_time=1.285e-04, forward_time=0.145, loss_ctc=59.761, loss_att=41.062, acc=0.728, loss=46.672, backward_time=1.026, grad_norm=94.381, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.829e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 23:59:19,301 (trainer:732) INFO: 28epoch:train:8101-8200batch: iter_time=1.383e-04, forward_time=0.145, loss_ctc=63.711, loss_att=48.157, acc=0.707, loss=52.823, backward_time=1.029, grad_norm=95.737, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.828e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 00:01:34,627 (trainer:732) INFO: 28epoch:train:8201-8300batch: iter_time=1.262e-04, forward_time=0.144, loss_ctc=74.016, loss_att=52.648, acc=0.706, loss=59.058, backward_time=1.025, grad_norm=98.099, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.826e-05, train_time=2.706 +[gpub005:0/64] 2023-07-09 00:02:20,574 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-09 00:02:39,116 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 00:02:42,968 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 00:02:42,969 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-09 00:02:42,975 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 00:07:17,507 (trainer:732) INFO: 28epoch:train:8301-8400batch: iter_time=1.279, forward_time=0.147, loss_ctc=67.473, loss_att=52.063, acc=0.704, loss=56.686, backward_time=1.045, grad_norm=110.832, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.825e-05, train_time=6.857 +[gpub005:0/64] 2023-07-09 00:09:34,819 (trainer:732) INFO: 28epoch:train:8401-8500batch: iter_time=1.026e-04, forward_time=0.143, loss_ctc=73.300, loss_att=54.385, acc=0.702, loss=60.059, backward_time=1.027, grad_norm=113.571, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.824e-05, train_time=2.746 +[gpub005:0/64] 2023-07-09 00:11:51,672 (trainer:732) INFO: 28epoch:train:8501-8600batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=79.277, loss_att=60.008, acc=0.702, loss=65.788, backward_time=1.029, grad_norm=134.268, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.822e-05, train_time=2.737 +[gpub005:0/64] 2023-07-09 00:14:08,387 (trainer:732) INFO: 28epoch:train:8601-8700batch: iter_time=1.041e-04, forward_time=0.144, loss_ctc=79.016, loss_att=61.352, acc=0.710, loss=66.652, backward_time=1.029, grad_norm=114.222, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.821e-05, train_time=2.734 +[gpub005:0/64] 2023-07-09 00:16:24,162 (trainer:732) INFO: 28epoch:train:8701-8800batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=71.708, loss_att=50.081, acc=0.692, loss=56.569, backward_time=1.027, grad_norm=119.421, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.820e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 00:18:39,832 (trainer:732) INFO: 28epoch:train:8801-8900batch: iter_time=1.068e-04, forward_time=0.145, loss_ctc=63.647, loss_att=45.869, acc=0.730, loss=51.202, backward_time=1.027, grad_norm=81.853, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.819e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 00:21:09,960 (trainer:732) INFO: 28epoch:train:8901-9000batch: iter_time=1.066e-04, forward_time=0.145, loss_ctc=55.394, loss_att=39.466, acc=0.733, loss=44.244, backward_time=1.066, grad_norm=83.283, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.817e-05, train_time=3.002 +[gpub005:0/64] 2023-07-09 00:23:27,121 (trainer:732) INFO: 28epoch:train:9001-9100batch: iter_time=1.032e-04, forward_time=0.146, loss_ctc=77.192, loss_att=57.711, acc=0.712, loss=63.555, backward_time=1.030, grad_norm=122.747, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.816e-05, train_time=2.743 +[gpub005:0/64] 2023-07-09 00:24:58,099 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-09 00:25:16,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 00:25:19,994 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 00:25:19,994 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-09 00:25:20,000 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 00:30:05,437 (trainer:732) INFO: 28epoch:train:9101-9200batch: iter_time=1.262, forward_time=0.153, loss_ctc=66.052, loss_att=48.621, acc=0.718, loss=53.850, backward_time=1.040, grad_norm=88.945, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.815e-05, train_time=7.966 +[gpub005:0/64] 2023-07-09 00:32:27,808 (trainer:732) INFO: 28epoch:train:9201-9300batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=71.873, loss_att=53.639, acc=0.689, loss=59.110, backward_time=1.041, grad_norm=96.280, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.814e-05, train_time=2.847 +[gpub005:0/64] 2023-07-09 00:34:45,342 (trainer:732) INFO: 28epoch:train:9301-9400batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=80.187, loss_att=62.167, acc=0.681, loss=67.573, backward_time=1.028, grad_norm=122.430, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.812e-05, train_time=2.750 +[gpub005:0/64] 2023-07-09 00:37:02,014 (trainer:732) INFO: 28epoch:train:9401-9500batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=78.718, loss_att=59.859, acc=0.703, loss=65.517, backward_time=1.029, grad_norm=119.060, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.811e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 00:39:18,308 (trainer:732) INFO: 28epoch:train:9501-9600batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=72.327, loss_att=48.677, acc=0.700, loss=55.772, backward_time=1.028, grad_norm=106.106, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.810e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 00:41:34,013 (trainer:732) INFO: 28epoch:train:9601-9700batch: iter_time=1.140e-04, forward_time=0.144, loss_ctc=64.789, loss_att=49.014, acc=0.714, loss=53.746, backward_time=1.026, grad_norm=113.313, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.182, optim0_lr0=6.809e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 00:43:49,399 (trainer:732) INFO: 28epoch:train:9701-9800batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=57.198, loss_att=41.033, acc=0.717, loss=45.882, backward_time=1.025, grad_norm=93.064, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.807e-05, train_time=2.707 +[gpub005:0/64] 2023-07-09 00:46:05,002 (trainer:732) INFO: 28epoch:train:9801-9900batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=73.809, loss_att=54.241, acc=0.702, loss=60.111, backward_time=1.027, grad_norm=105.246, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.806e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 00:48:20,905 (trainer:732) INFO: 28epoch:train:9901-10000batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=79.070, loss_att=61.377, acc=0.697, loss=66.685, backward_time=1.029, grad_norm=105.646, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.805e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 01:00:59,859 (trainer:338) INFO: 28epoch results: [train] iter_time=0.181, forward_time=0.147, loss_ctc=71.863, loss_att=53.365, acc=0.703, loss=58.915, backward_time=1.032, grad_norm=105.211, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.868e-05, train_time=3.295, time=4 hours, 34 minutes and 54.57 seconds, total_count=250000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=47.311, cer_ctc=0.272, loss_att=40.448, acc=0.654, cer=0.441, wer=1.000, loss=42.507, time=6 minutes and 28.69 seconds, total_count=25806, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 49.92 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-09 01:01:17,688 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-09 01:01:17,806 (trainer:272) INFO: 29/30epoch started. Estimated time to finish: 9 hours, 37 minutes and 17.41 seconds +[gpub005:0/64] 2023-07-09 01:01:18,821 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-09 01:01:37,314 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 01:01:42,700 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 01:01:42,700 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-09 01:01:42,754 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 01:08:33,856 (trainer:732) INFO: 29epoch:train:1-100batch: iter_time=2.923, forward_time=0.172, loss_ctc=75.895, loss_att=61.514, acc=0.699, loss=65.828, backward_time=1.046, grad_norm=122.053, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.192, optim0_lr0=6.803e-05, train_time=8.710 +[gpub005:0/64] 2023-07-09 01:10:50,462 (trainer:732) INFO: 29epoch:train:101-200batch: iter_time=1.038e-04, forward_time=0.144, loss_ctc=68.461, loss_att=53.461, acc=0.697, loss=57.961, backward_time=1.028, grad_norm=104.908, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.802e-05, train_time=2.732 +[gpub005:0/64] 2023-07-09 01:13:08,278 (trainer:732) INFO: 29epoch:train:201-300batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=90.315, loss_att=55.873, acc=0.705, loss=66.206, backward_time=1.031, grad_norm=113.891, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.801e-05, train_time=2.756 +[gpub005:0/64] 2023-07-09 01:15:33,011 (trainer:732) INFO: 29epoch:train:301-400batch: iter_time=1.055e-04, forward_time=0.145, loss_ctc=73.221, loss_att=50.232, acc=0.710, loss=57.129, backward_time=1.036, grad_norm=122.026, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.800e-05, train_time=2.894 +[gpub005:0/64] 2023-07-09 01:17:54,852 (trainer:732) INFO: 29epoch:train:401-500batch: iter_time=9.923e-05, forward_time=0.145, loss_ctc=73.934, loss_att=56.527, acc=0.698, loss=61.749, backward_time=1.033, grad_norm=117.019, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.798e-05, train_time=2.837 +[gpub005:0/64] 2023-07-09 01:20:26,236 (trainer:732) INFO: 29epoch:train:501-600batch: iter_time=9.637e-05, forward_time=0.145, loss_ctc=70.822, loss_att=54.513, acc=0.707, loss=59.406, backward_time=1.075, grad_norm=98.241, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.797e-05, train_time=3.027 +[gpub005:0/64] 2023-07-09 01:22:58,108 (trainer:732) INFO: 29epoch:train:601-700batch: iter_time=9.952e-05, forward_time=0.145, loss_ctc=70.652, loss_att=57.641, acc=0.680, loss=61.544, backward_time=1.053, grad_norm=96.985, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.796e-05, train_time=3.037 +[gpub005:0/64] 2023-07-09 01:25:17,769 (trainer:732) INFO: 29epoch:train:701-800batch: iter_time=1.138e-04, forward_time=0.144, loss_ctc=60.087, loss_att=42.384, acc=0.718, loss=47.695, backward_time=1.040, grad_norm=90.361, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.795e-05, train_time=2.793 +[gpub005:0/64] 2023-07-09 01:26:10,853 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-09 01:26:28,595 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 01:26:32,230 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 01:26:32,230 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-09 01:26:32,236 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 01:31:54,364 (trainer:732) INFO: 29epoch:train:801-900batch: iter_time=1.384, forward_time=0.149, loss_ctc=78.227, loss_att=62.233, acc=0.694, loss=67.031, backward_time=1.042, grad_norm=108.631, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.793e-05, train_time=7.932 +[gpub005:0/64] 2023-07-09 01:34:11,688 (trainer:732) INFO: 29epoch:train:901-1000batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=63.851, loss_att=49.602, acc=0.700, loss=53.877, backward_time=1.029, grad_norm=110.254, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.792e-05, train_time=2.746 +[gpub005:0/64] 2023-07-09 01:36:27,600 (trainer:732) INFO: 29epoch:train:1001-1100batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=78.049, loss_att=56.277, acc=0.710, loss=62.809, backward_time=1.029, grad_norm=103.467, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.791e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 01:38:43,893 (trainer:732) INFO: 29epoch:train:1101-1200batch: iter_time=1.122e-04, forward_time=0.145, loss_ctc=80.017, loss_att=48.167, acc=0.710, loss=57.722, backward_time=1.029, grad_norm=140.589, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.790e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 01:41:00,282 (trainer:732) INFO: 29epoch:train:1201-1300batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=76.299, loss_att=60.020, acc=0.697, loss=64.904, backward_time=1.032, grad_norm=116.404, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.788e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 01:43:16,385 (trainer:732) INFO: 29epoch:train:1301-1400batch: iter_time=1.132e-04, forward_time=0.146, loss_ctc=73.892, loss_att=56.992, acc=0.705, loss=62.062, backward_time=1.031, grad_norm=120.717, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.787e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 01:45:32,444 (trainer:732) INFO: 29epoch:train:1401-1500batch: iter_time=1.029e-04, forward_time=0.146, loss_ctc=70.577, loss_att=55.685, acc=0.690, loss=60.153, backward_time=1.030, grad_norm=103.511, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.786e-05, train_time=2.721 +[gpub005:0/64] 2023-07-09 01:47:47,996 (trainer:732) INFO: 29epoch:train:1501-1600batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=63.930, loss_att=50.799, acc=0.697, loss=54.738, backward_time=1.026, grad_norm=103.992, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.785e-05, train_time=2.711 +[gpub005:0/64] 2023-07-09 01:49:20,766 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-09 01:49:38,971 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 01:49:42,688 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 01:49:42,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-09 01:49:42,694 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 01:54:36,572 (trainer:732) INFO: 29epoch:train:1601-1700batch: iter_time=1.342, forward_time=0.168, loss_ctc=63.970, loss_att=46.468, acc=0.710, loss=51.719, backward_time=1.035, grad_norm=113.327, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.184, optim0_lr0=6.783e-05, train_time=8.171 +[gpub005:0/64] 2023-07-09 01:56:56,586 (trainer:732) INFO: 29epoch:train:1701-1800batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=64.314, loss_att=50.068, acc=0.699, loss=54.342, backward_time=1.035, grad_norm=153.292, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.782e-05, train_time=2.800 +[gpub005:0/64] 2023-07-09 01:59:12,539 (trainer:732) INFO: 29epoch:train:1801-1900batch: iter_time=1.046e-04, forward_time=0.147, loss_ctc=74.996, loss_att=54.133, acc=0.713, loss=60.392, backward_time=1.029, grad_norm=101.452, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.781e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 02:01:28,732 (trainer:732) INFO: 29epoch:train:1901-2000batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=79.906, loss_att=47.196, acc=0.715, loss=57.009, backward_time=1.030, grad_norm=115.619, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.780e-05, train_time=2.724 +[gpub005:0/64] 2023-07-09 02:03:45,806 (trainer:732) INFO: 29epoch:train:2001-2100batch: iter_time=1.086e-04, forward_time=0.148, loss_ctc=73.845, loss_att=58.312, acc=0.700, loss=62.972, backward_time=1.032, grad_norm=112.680, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.778e-05, train_time=2.741 +[gpub005:0/64] 2023-07-09 02:06:01,896 (trainer:732) INFO: 29epoch:train:2101-2200batch: iter_time=1.073e-04, forward_time=0.147, loss_ctc=72.628, loss_att=54.739, acc=0.701, loss=60.106, backward_time=1.030, grad_norm=99.890, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.777e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 02:08:17,780 (trainer:732) INFO: 29epoch:train:2201-2300batch: iter_time=1.062e-04, forward_time=0.146, loss_ctc=69.153, loss_att=52.877, acc=0.697, loss=57.760, backward_time=1.029, grad_norm=124.253, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.776e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 02:10:33,671 (trainer:732) INFO: 29epoch:train:2301-2400batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=64.791, loss_att=53.066, acc=0.698, loss=56.583, backward_time=1.029, grad_norm=124.055, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.775e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 02:12:49,411 (trainer:732) INFO: 29epoch:train:2401-2500batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=67.622, loss_att=55.140, acc=0.702, loss=58.884, backward_time=1.028, grad_norm=88.955, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.182, optim0_lr0=6.773e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 02:12:54,375 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-09 02:13:12,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 02:13:15,958 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 02:13:15,958 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-09 02:13:15,964 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 02:19:10,372 (trainer:732) INFO: 29epoch:train:2501-2600batch: iter_time=2.404, forward_time=0.165, loss_ctc=66.935, loss_att=50.456, acc=0.706, loss=55.400, backward_time=1.037, grad_norm=106.775, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.185, optim0_lr0=6.772e-05, train_time=7.619 +[gpub005:0/64] 2023-07-09 02:21:26,766 (trainer:732) INFO: 29epoch:train:2601-2700batch: iter_time=1.113e-04, forward_time=0.146, loss_ctc=68.042, loss_att=50.061, acc=0.715, loss=55.456, backward_time=1.030, grad_norm=115.964, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.771e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 02:23:45,668 (trainer:732) INFO: 29epoch:train:2701-2800batch: iter_time=1.113e-04, forward_time=0.144, loss_ctc=87.303, loss_att=56.217, acc=0.712, loss=65.543, backward_time=1.029, grad_norm=147.226, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.770e-05, train_time=2.778 +[gpub005:0/64] 2023-07-09 02:26:01,790 (trainer:732) INFO: 29epoch:train:2801-2900batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=68.342, loss_att=46.773, acc=0.712, loss=53.244, backward_time=1.029, grad_norm=107.727, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.768e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 02:28:17,591 (trainer:732) INFO: 29epoch:train:2901-3000batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=70.546, loss_att=55.696, acc=0.702, loss=60.151, backward_time=1.028, grad_norm=97.665, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.767e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 02:30:33,597 (trainer:732) INFO: 29epoch:train:3001-3100batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=73.929, loss_att=58.503, acc=0.704, loss=63.131, backward_time=1.029, grad_norm=99.412, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.766e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 02:32:49,257 (trainer:732) INFO: 29epoch:train:3101-3200batch: iter_time=1.203e-04, forward_time=0.146, loss_ctc=64.078, loss_att=51.000, acc=0.697, loss=54.924, backward_time=1.026, grad_norm=104.136, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.765e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 02:35:05,104 (trainer:732) INFO: 29epoch:train:3201-3300batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=64.099, loss_att=47.265, acc=0.709, loss=52.315, backward_time=1.029, grad_norm=97.522, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.764e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 02:35:54,763 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-09 02:36:12,774 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 02:36:16,518 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 02:36:16,518 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-09 02:36:16,524 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 02:40:55,557 (trainer:732) INFO: 29epoch:train:3301-3400batch: iter_time=1.419, forward_time=0.149, loss_ctc=81.136, loss_att=66.941, acc=0.694, loss=71.200, backward_time=1.068, grad_norm=119.406, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.762e-05, train_time=7.009 +[gpub005:0/64] 2023-07-09 02:43:16,133 (trainer:732) INFO: 29epoch:train:3401-3500batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=63.487, loss_att=49.185, acc=0.710, loss=53.476, backward_time=1.034, grad_norm=98.479, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.761e-05, train_time=2.811 +[gpub005:0/64] 2023-07-09 02:45:32,121 (trainer:732) INFO: 29epoch:train:3501-3600batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=77.025, loss_att=55.124, acc=0.715, loss=61.694, backward_time=1.028, grad_norm=114.670, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.760e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 02:47:47,521 (trainer:732) INFO: 29epoch:train:3601-3700batch: iter_time=1.214e-04, forward_time=0.145, loss_ctc=76.344, loss_att=46.095, acc=0.716, loss=55.170, backward_time=1.025, grad_norm=108.088, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.759e-05, train_time=2.708 +[gpub005:0/64] 2023-07-09 02:50:03,361 (trainer:732) INFO: 29epoch:train:3701-3800batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=74.359, loss_att=57.845, acc=0.703, loss=62.799, backward_time=1.027, grad_norm=113.639, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.757e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 02:52:18,965 (trainer:732) INFO: 29epoch:train:3801-3900batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=74.601, loss_att=55.697, acc=0.706, loss=61.368, backward_time=1.027, grad_norm=110.806, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.756e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 02:54:49,718 (trainer:732) INFO: 29epoch:train:3901-4000batch: iter_time=1.248e-04, forward_time=0.147, loss_ctc=69.897, loss_att=54.034, acc=0.698, loss=58.793, backward_time=1.058, grad_norm=103.506, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.755e-05, train_time=3.015 +[gpub005:0/64] 2023-07-09 02:57:08,751 (trainer:732) INFO: 29epoch:train:4001-4100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=63.725, loss_att=50.310, acc=0.701, loss=54.335, backward_time=1.034, grad_norm=87.206, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.754e-05, train_time=2.780 +[gpub005:0/64] 2023-07-09 02:58:49,008 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-09 02:59:07,100 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 02:59:10,536 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 02:59:10,536 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-09 02:59:10,542 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 03:03:56,521 (trainer:732) INFO: 29epoch:train:4101-4200batch: iter_time=1.354, forward_time=0.145, loss_ctc=72.105, loss_att=56.202, acc=0.708, loss=60.973, backward_time=1.040, grad_norm=111.622, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.752e-05, train_time=8.155 +[gpub005:0/64] 2023-07-09 03:06:13,202 (trainer:732) INFO: 29epoch:train:4201-4300batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=69.002, loss_att=53.608, acc=0.690, loss=58.226, backward_time=1.028, grad_norm=99.107, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.751e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 03:08:28,913 (trainer:732) INFO: 29epoch:train:4301-4400batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=68.222, loss_att=49.890, acc=0.715, loss=55.390, backward_time=1.027, grad_norm=99.071, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.750e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 03:10:44,538 (trainer:732) INFO: 29epoch:train:4401-4500batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=84.780, loss_att=56.108, acc=0.707, loss=64.710, backward_time=1.027, grad_norm=107.348, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.749e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 03:13:02,169 (trainer:732) INFO: 29epoch:train:4501-4600batch: iter_time=1.167e-04, forward_time=0.145, loss_ctc=69.759, loss_att=47.626, acc=0.702, loss=54.266, backward_time=1.029, grad_norm=98.923, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.747e-05, train_time=2.752 +[gpub005:0/64] 2023-07-09 03:15:18,719 (trainer:732) INFO: 29epoch:train:4601-4700batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=70.803, loss_att=56.571, acc=0.695, loss=60.841, backward_time=1.026, grad_norm=104.781, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.746e-05, train_time=2.731 +[gpub005:0/64] 2023-07-09 03:17:34,404 (trainer:732) INFO: 29epoch:train:4701-4800batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=72.078, loss_att=57.762, acc=0.699, loss=62.057, backward_time=1.027, grad_norm=104.257, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.745e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 03:19:52,007 (trainer:732) INFO: 29epoch:train:4801-4900batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=64.906, loss_att=51.504, acc=0.697, loss=55.525, backward_time=1.030, grad_norm=93.447, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.744e-05, train_time=2.752 +[gpub005:0/64] 2023-07-09 03:22:08,209 (trainer:732) INFO: 29epoch:train:4901-5000batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.847, loss_att=49.021, acc=0.700, loss=53.469, backward_time=1.026, grad_norm=89.197, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.743e-05, train_time=2.724 +[gpub005:0/64] 2023-07-09 03:22:16,127 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-09 03:22:34,650 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 03:22:38,116 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 03:22:38,116 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-09 03:22:38,123 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 03:29:32,011 (trainer:732) INFO: 29epoch:train:5001-5100batch: iter_time=1.438, forward_time=0.167, loss_ctc=66.539, loss_att=51.321, acc=0.694, loss=55.886, backward_time=1.037, grad_norm=101.883, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.184, optim0_lr0=6.741e-05, train_time=8.875 +[gpub005:0/64] 2023-07-09 03:31:48,082 (trainer:732) INFO: 29epoch:train:5101-5200batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=67.465, loss_att=49.395, acc=0.710, loss=54.816, backward_time=1.027, grad_norm=99.778, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.740e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 03:34:04,172 (trainer:732) INFO: 29epoch:train:5201-5300batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=86.270, loss_att=55.902, acc=0.709, loss=65.012, backward_time=1.026, grad_norm=130.441, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.739e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 03:36:19,829 (trainer:732) INFO: 29epoch:train:5301-5400batch: iter_time=1.055e-04, forward_time=0.147, loss_ctc=69.380, loss_att=47.502, acc=0.705, loss=54.065, backward_time=1.026, grad_norm=129.518, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.738e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 03:38:36,943 (trainer:732) INFO: 29epoch:train:5401-5500batch: iter_time=1.046e-04, forward_time=0.148, loss_ctc=69.082, loss_att=55.354, acc=0.697, loss=59.473, backward_time=1.029, grad_norm=113.486, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.736e-05, train_time=2.742 +[gpub005:0/64] 2023-07-09 03:40:53,445 (trainer:732) INFO: 29epoch:train:5501-5600batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=72.582, loss_att=57.397, acc=0.696, loss=61.952, backward_time=1.029, grad_norm=101.675, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.735e-05, train_time=2.730 +[gpub005:0/64] 2023-07-09 03:43:09,072 (trainer:732) INFO: 29epoch:train:5601-5700batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=64.110, loss_att=50.796, acc=0.696, loss=54.790, backward_time=1.026, grad_norm=95.741, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.734e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 03:45:25,401 (trainer:732) INFO: 29epoch:train:5701-5800batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=63.508, loss_att=48.524, acc=0.705, loss=53.020, backward_time=1.026, grad_norm=92.218, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.733e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 03:46:27,381 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-09 03:46:45,495 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 03:46:49,010 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 03:46:49,010 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-09 03:46:49,016 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 03:53:18,837 (trainer:732) INFO: 29epoch:train:5801-5900batch: iter_time=2.783, forward_time=0.147, loss_ctc=79.436, loss_att=67.417, acc=0.689, loss=71.022, backward_time=1.047, grad_norm=117.828, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.732e-05, train_time=9.468 +[gpub005:0/64] 2023-07-09 03:55:34,614 (trainer:732) INFO: 29epoch:train:5901-6000batch: iter_time=1.240e-04, forward_time=0.145, loss_ctc=62.841, loss_att=48.708, acc=0.705, loss=52.948, backward_time=1.026, grad_norm=98.596, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.730e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 03:57:50,427 (trainer:732) INFO: 29epoch:train:6001-6100batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=76.795, loss_att=55.754, acc=0.710, loss=62.066, backward_time=1.024, grad_norm=125.132, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.729e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 04:00:06,297 (trainer:732) INFO: 29epoch:train:6101-6200batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=76.623, loss_att=46.076, acc=0.713, loss=55.240, backward_time=1.026, grad_norm=135.922, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.728e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 04:02:23,184 (trainer:732) INFO: 29epoch:train:6201-6300batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=73.911, loss_att=58.019, acc=0.695, loss=62.787, backward_time=1.028, grad_norm=106.548, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.727e-05, train_time=2.738 +[gpub005:0/64] 2023-07-09 04:04:41,945 (trainer:732) INFO: 29epoch:train:6301-6400batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=73.690, loss_att=55.703, acc=0.702, loss=61.099, backward_time=1.028, grad_norm=92.974, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.725e-05, train_time=2.775 +[gpub005:0/64] 2023-07-09 04:07:02,114 (trainer:732) INFO: 29epoch:train:6401-6500batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=69.683, loss_att=53.819, acc=0.693, loss=58.578, backward_time=1.030, grad_norm=107.754, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.724e-05, train_time=2.803 +[gpub005:0/64] 2023-07-09 04:09:24,271 (trainer:732) INFO: 29epoch:train:6501-6600batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=62.912, loss_att=49.656, acc=0.700, loss=53.633, backward_time=1.051, grad_norm=93.183, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.723e-05, train_time=2.843 +[gpub005:0/64] 2023-07-09 04:10:56,904 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-09 04:11:14,983 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 04:11:18,376 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 04:11:18,376 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-09 04:11:18,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 04:15:17,356 (trainer:732) INFO: 29epoch:train:6601-6700batch: iter_time=1.432, forward_time=0.150, loss_ctc=71.020, loss_att=55.879, acc=0.709, loss=60.421, backward_time=1.057, grad_norm=109.386, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.722e-05, train_time=7.061 +[gpub005:0/64] 2023-07-09 04:17:34,269 (trainer:732) INFO: 29epoch:train:6701-6800batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=67.619, loss_att=52.545, acc=0.704, loss=57.067, backward_time=1.032, grad_norm=94.824, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.721e-05, train_time=2.738 +[gpub005:0/64] 2023-07-09 04:19:50,475 (trainer:732) INFO: 29epoch:train:6801-6900batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.948, loss_att=51.387, acc=0.714, loss=56.355, backward_time=1.030, grad_norm=96.289, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.719e-05, train_time=2.724 +[gpub005:0/64] 2023-07-09 04:22:06,494 (trainer:732) INFO: 29epoch:train:6901-7000batch: iter_time=1.305e-04, forward_time=0.147, loss_ctc=84.714, loss_att=54.294, acc=0.723, loss=63.420, backward_time=1.030, grad_norm=101.845, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.718e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 04:24:22,311 (trainer:732) INFO: 29epoch:train:7001-7100batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=69.271, loss_att=47.599, acc=0.712, loss=54.101, backward_time=1.026, grad_norm=120.928, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.717e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 04:26:38,042 (trainer:732) INFO: 29epoch:train:7101-7200batch: iter_time=1.078e-04, forward_time=0.145, loss_ctc=69.513, loss_att=54.422, acc=0.710, loss=58.949, backward_time=1.027, grad_norm=110.610, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.716e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 04:28:54,309 (trainer:732) INFO: 29epoch:train:7201-7300batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=72.193, loss_att=57.718, acc=0.709, loss=62.061, backward_time=1.030, grad_norm=102.662, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.715e-05, train_time=2.725 +[gpub005:0/64] 2023-07-09 04:31:09,724 (trainer:732) INFO: 29epoch:train:7301-7400batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=66.020, loss_att=51.633, acc=0.701, loss=55.949, backward_time=1.025, grad_norm=97.883, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.713e-05, train_time=2.708 +[gpub005:0/64] 2023-07-09 04:33:26,175 (trainer:732) INFO: 29epoch:train:7401-7500batch: iter_time=1.074e-04, forward_time=0.147, loss_ctc=62.895, loss_att=47.361, acc=0.711, loss=52.021, backward_time=1.027, grad_norm=95.711, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.712e-05, train_time=2.729 +[gpub005:0/64] 2023-07-09 04:33:28,013 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-09 04:33:46,250 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 04:33:49,686 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 04:33:49,686 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-09 04:33:49,720 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 04:39:35,421 (trainer:732) INFO: 29epoch:train:7501-7600batch: iter_time=1.272, forward_time=0.187, loss_ctc=73.581, loss_att=59.664, acc=0.699, loss=63.839, backward_time=1.051, grad_norm=123.090, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.185, optim0_lr0=6.711e-05, train_time=7.385 +[gpub005:0/64] 2023-07-09 04:41:51,424 (trainer:732) INFO: 29epoch:train:7601-7700batch: iter_time=1.149e-04, forward_time=0.147, loss_ctc=66.484, loss_att=50.773, acc=0.706, loss=55.486, backward_time=1.027, grad_norm=101.249, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.710e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 04:44:13,765 (trainer:732) INFO: 29epoch:train:7701-7800batch: iter_time=1.341e-04, forward_time=0.146, loss_ctc=84.648, loss_att=54.865, acc=0.710, loss=63.799, backward_time=1.032, grad_norm=114.104, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.709e-05, train_time=2.847 +[gpub005:0/64] 2023-07-09 04:46:32,054 (trainer:732) INFO: 29epoch:train:7801-7900batch: iter_time=1.111e-04, forward_time=0.147, loss_ctc=69.048, loss_att=47.959, acc=0.715, loss=54.286, backward_time=1.040, grad_norm=97.389, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.707e-05, train_time=2.766 +[gpub005:0/64] 2023-07-09 04:48:49,620 (trainer:732) INFO: 29epoch:train:7901-8000batch: iter_time=1.106e-04, forward_time=0.147, loss_ctc=72.274, loss_att=55.314, acc=0.692, loss=60.402, backward_time=1.028, grad_norm=137.366, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.706e-05, train_time=2.751 +[gpub005:0/64] 2023-07-09 04:51:28,616 (trainer:732) INFO: 29epoch:train:8001-8100batch: iter_time=1.309e-04, forward_time=0.147, loss_ctc=69.384, loss_att=53.865, acc=0.708, loss=58.521, backward_time=1.057, grad_norm=96.316, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.705e-05, train_time=3.180 +[gpub005:0/64] 2023-07-09 04:53:45,027 (trainer:732) INFO: 29epoch:train:8101-8200batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=69.389, loss_att=54.583, acc=0.692, loss=59.025, backward_time=1.030, grad_norm=99.264, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.704e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 04:56:00,319 (trainer:732) INFO: 29epoch:train:8201-8300batch: iter_time=1.142e-04, forward_time=0.143, loss_ctc=58.213, loss_att=41.467, acc=0.724, loss=46.491, backward_time=1.023, grad_norm=83.113, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.702e-05, train_time=2.706 +[gpub005:0/64] 2023-07-09 04:56:51,431 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-09 04:57:09,723 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 04:57:13,145 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 04:57:13,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-09 04:57:13,152 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 05:01:29,095 (trainer:732) INFO: 29epoch:train:8301-8400batch: iter_time=1.321, forward_time=0.146, loss_ctc=77.823, loss_att=61.892, acc=0.697, loss=66.671, backward_time=1.041, grad_norm=131.315, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.701e-05, train_time=6.575 +[gpub005:0/64] 2023-07-09 05:03:46,371 (trainer:732) INFO: 29epoch:train:8401-8500batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=62.357, loss_att=49.168, acc=0.702, loss=53.124, backward_time=1.028, grad_norm=114.769, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.700e-05, train_time=2.745 +[gpub005:0/64] 2023-07-09 05:06:02,208 (trainer:732) INFO: 29epoch:train:8501-8600batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=76.933, loss_att=55.223, acc=0.716, loss=61.736, backward_time=1.029, grad_norm=107.873, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.699e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 05:08:17,804 (trainer:732) INFO: 29epoch:train:8601-8700batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=75.982, loss_att=45.898, acc=0.712, loss=54.923, backward_time=1.026, grad_norm=112.216, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.698e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 05:10:33,845 (trainer:732) INFO: 29epoch:train:8701-8800batch: iter_time=1.255e-04, forward_time=0.147, loss_ctc=74.836, loss_att=57.671, acc=0.698, loss=62.820, backward_time=1.029, grad_norm=99.814, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.696e-05, train_time=2.721 +[gpub005:0/64] 2023-07-09 05:12:49,571 (trainer:732) INFO: 29epoch:train:8801-8900batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=72.233, loss_att=54.973, acc=0.705, loss=60.151, backward_time=1.027, grad_norm=95.784, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.695e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 05:15:05,314 (trainer:732) INFO: 29epoch:train:8901-9000batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=67.982, loss_att=52.959, acc=0.698, loss=57.466, backward_time=1.026, grad_norm=94.407, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.694e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 05:17:20,925 (trainer:732) INFO: 29epoch:train:9001-9100batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=63.000, loss_att=48.975, acc=0.703, loss=53.183, backward_time=1.027, grad_norm=93.871, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.693e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 05:18:51,733 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-09 05:19:10,033 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 05:19:13,693 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 05:19:13,693 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-09 05:19:13,699 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 05:22:38,222 (trainer:732) INFO: 29epoch:train:9101-9200batch: iter_time=1.288, forward_time=0.145, loss_ctc=63.360, loss_att=48.019, acc=0.710, loss=52.621, backward_time=1.038, grad_norm=99.747, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.692e-05, train_time=6.346 +[gpub005:0/64] 2023-07-09 05:24:58,949 (trainer:732) INFO: 29epoch:train:9201-9300batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=62.760, loss_att=47.716, acc=0.709, loss=52.229, backward_time=1.046, grad_norm=108.849, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.690e-05, train_time=2.814 +[gpub005:0/64] 2023-07-09 05:27:21,659 (trainer:732) INFO: 29epoch:train:9301-9400batch: iter_time=1.041e-04, forward_time=0.146, loss_ctc=74.976, loss_att=52.987, acc=0.714, loss=59.583, backward_time=1.067, grad_norm=117.382, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.689e-05, train_time=2.854 +[gpub005:0/64] 2023-07-09 05:29:40,539 (trainer:732) INFO: 29epoch:train:9401-9500batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=76.575, loss_att=47.980, acc=0.712, loss=56.559, backward_time=1.030, grad_norm=107.961, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.688e-05, train_time=2.777 +[gpub005:0/64] 2023-07-09 05:31:56,436 (trainer:732) INFO: 29epoch:train:9501-9600batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=70.340, loss_att=56.056, acc=0.697, loss=60.341, backward_time=1.027, grad_norm=106.273, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.687e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 05:34:12,843 (trainer:732) INFO: 29epoch:train:9601-9700batch: iter_time=1.046e-04, forward_time=0.146, loss_ctc=71.444, loss_att=54.036, acc=0.701, loss=59.259, backward_time=1.028, grad_norm=99.944, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.686e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 05:36:29,650 (trainer:732) INFO: 29epoch:train:9701-9800batch: iter_time=1.158e-04, forward_time=0.152, loss_ctc=68.773, loss_att=52.287, acc=0.698, loss=57.233, backward_time=1.027, grad_norm=110.662, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.684e-05, train_time=2.736 +[gpub005:0/64] 2023-07-09 05:38:48,684 (trainer:732) INFO: 29epoch:train:9801-9900batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=64.624, loss_att=52.273, acc=0.696, loss=55.979, backward_time=1.028, grad_norm=103.082, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.683e-05, train_time=2.780 +[gpub005:0/64] 2023-07-09 05:41:10,992 (trainer:732) INFO: 29epoch:train:9901-10000batch: iter_time=1.091e-04, forward_time=0.145, loss_ctc=67.638, loss_att=54.649, acc=0.699, loss=58.546, backward_time=1.032, grad_norm=92.414, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.682e-05, train_time=2.846 +[gpub005:0/64] 2023-07-09 05:55:00,842 (trainer:338) INFO: 29epoch results: [train] iter_time=0.204, forward_time=0.147, loss_ctc=70.995, loss_att=53.088, acc=0.704, loss=58.460, backward_time=1.033, grad_norm=107.676, clip=100.000, loss_scale=2.515e+25, optim_step_time=0.183, optim0_lr0=6.742e-05, train_time=3.358, time=4 hours, 40 minutes and 6.74 seconds, total_count=260000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.134, cer_ctc=0.273, loss_att=39.724, acc=0.683, cer=0.360, wer=0.992, loss=42.247, time=7 minutes and 46.79 seconds, total_count=26818, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 49.35 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-09 05:55:19,622 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub005:0/64] 2023-07-09 05:55:19,855 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/18epoch.pth +[gpub005:0/64] 2023-07-09 05:55:19,907 (trainer:272) INFO: 30/30epoch started. Estimated time to finish: 4 hours, 49 minutes and 24.9 seconds +[gpub005:0/64] 2023-07-09 05:55:21,146 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-09 05:55:40,193 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 05:55:43,757 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 05:55:43,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-09 05:55:43,848 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 06:02:34,500 (trainer:732) INFO: 30epoch:train:1-100batch: iter_time=2.918, forward_time=0.176, loss_ctc=81.654, loss_att=67.808, acc=0.681, loss=71.962, backward_time=1.043, grad_norm=106.248, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.185, optim0_lr0=6.681e-05, train_time=8.680 +[gpub005:0/64] 2023-07-09 06:04:53,428 (trainer:732) INFO: 30epoch:train:101-200batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=70.159, loss_att=56.442, acc=0.698, loss=60.557, backward_time=1.033, grad_norm=116.749, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.680e-05, train_time=2.778 +[gpub005:0/64] 2023-07-09 06:07:19,263 (trainer:732) INFO: 30epoch:train:201-300batch: iter_time=1.039e-04, forward_time=0.144, loss_ctc=75.061, loss_att=51.507, acc=0.692, loss=58.573, backward_time=1.042, grad_norm=111.019, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.679e-05, train_time=2.916 +[gpub005:0/64] 2023-07-09 06:09:35,049 (trainer:732) INFO: 30epoch:train:301-400batch: iter_time=1.042e-04, forward_time=0.144, loss_ctc=69.663, loss_att=53.432, acc=0.703, loss=58.301, backward_time=1.029, grad_norm=130.261, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.677e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 06:11:56,677 (trainer:732) INFO: 30epoch:train:401-500batch: iter_time=1.208e-04, forward_time=0.144, loss_ctc=79.996, loss_att=60.102, acc=0.682, loss=66.070, backward_time=1.054, grad_norm=137.978, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.676e-05, train_time=2.832 +[gpub005:0/64] 2023-07-09 06:14:12,035 (trainer:732) INFO: 30epoch:train:501-600batch: iter_time=1.345e-04, forward_time=0.144, loss_ctc=75.063, loss_att=53.414, acc=0.695, loss=59.909, backward_time=1.026, grad_norm=97.590, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.675e-05, train_time=2.707 +[gpub005:0/64] 2023-07-09 06:16:30,642 (trainer:732) INFO: 30epoch:train:601-700batch: iter_time=1.235e-04, forward_time=0.145, loss_ctc=72.761, loss_att=55.961, acc=0.699, loss=61.001, backward_time=1.029, grad_norm=115.744, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.674e-05, train_time=2.772 +[gpub005:0/64] 2023-07-09 06:18:50,359 (trainer:732) INFO: 30epoch:train:701-800batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=78.518, loss_att=61.887, acc=0.700, loss=66.876, backward_time=1.035, grad_norm=107.789, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.673e-05, train_time=2.794 +[gpub005:0/64] 2023-07-09 06:19:41,326 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-09 06:19:59,097 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 06:20:02,459 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 06:20:02,459 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-09 06:20:02,465 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 06:24:30,746 (trainer:732) INFO: 30epoch:train:801-900batch: iter_time=1.428, forward_time=0.152, loss_ctc=78.646, loss_att=64.512, acc=0.695, loss=68.752, backward_time=1.044, grad_norm=116.800, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.671e-05, train_time=6.808 +[gpub005:0/64] 2023-07-09 06:26:47,705 (trainer:732) INFO: 30epoch:train:901-1000batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=70.264, loss_att=56.691, acc=0.706, loss=60.763, backward_time=1.030, grad_norm=100.914, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.670e-05, train_time=2.739 +[gpub005:0/64] 2023-07-09 06:29:04,083 (trainer:732) INFO: 30epoch:train:1001-1100batch: iter_time=1.106e-04, forward_time=0.145, loss_ctc=72.147, loss_att=55.265, acc=0.704, loss=60.330, backward_time=1.028, grad_norm=115.205, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.669e-05, train_time=2.727 +[gpub005:0/64] 2023-07-09 06:31:19,765 (trainer:732) INFO: 30epoch:train:1101-1200batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=71.106, loss_att=51.456, acc=0.709, loss=57.351, backward_time=1.027, grad_norm=110.706, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.668e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 06:33:35,674 (trainer:732) INFO: 30epoch:train:1201-1300batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=73.058, loss_att=55.161, acc=0.705, loss=60.531, backward_time=1.028, grad_norm=104.330, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.667e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 06:35:51,423 (trainer:732) INFO: 30epoch:train:1301-1400batch: iter_time=1.195e-04, forward_time=0.146, loss_ctc=74.348, loss_att=54.096, acc=0.690, loss=60.172, backward_time=1.027, grad_norm=101.529, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.665e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 06:38:07,358 (trainer:732) INFO: 30epoch:train:1401-1500batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=66.464, loss_att=52.839, acc=0.721, loss=56.927, backward_time=1.028, grad_norm=90.007, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.664e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 06:40:22,816 (trainer:732) INFO: 30epoch:train:1501-1600batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=82.214, loss_att=60.679, acc=0.706, loss=67.140, backward_time=1.026, grad_norm=109.111, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.663e-05, train_time=2.709 +[gpub005:0/64] 2023-07-09 06:41:54,164 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-09 06:42:12,754 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 06:42:16,188 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 06:42:16,188 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-09 06:42:16,194 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 06:46:07,824 (trainer:732) INFO: 30epoch:train:1601-1700batch: iter_time=1.414, forward_time=0.155, loss_ctc=72.256, loss_att=57.905, acc=0.707, loss=62.210, backward_time=1.043, grad_norm=109.824, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.184, optim0_lr0=6.662e-05, train_time=6.900 +[gpub005:0/64] 2023-07-09 06:48:25,273 (trainer:732) INFO: 30epoch:train:1701-1800batch: iter_time=1.219e-04, forward_time=0.146, loss_ctc=71.125, loss_att=56.929, acc=0.694, loss=61.188, backward_time=1.037, grad_norm=93.078, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.661e-05, train_time=2.749 +[gpub005:0/64] 2023-07-09 06:50:41,031 (trainer:732) INFO: 30epoch:train:1801-1900batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=68.578, loss_att=54.880, acc=0.697, loss=58.990, backward_time=1.028, grad_norm=105.880, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.660e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 06:52:57,455 (trainer:732) INFO: 30epoch:train:1901-2000batch: iter_time=0.001, forward_time=0.148, loss_ctc=71.582, loss_att=53.088, acc=0.699, loss=58.636, backward_time=1.028, grad_norm=122.721, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.658e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 06:55:13,258 (trainer:732) INFO: 30epoch:train:2001-2100batch: iter_time=1.386e-04, forward_time=0.146, loss_ctc=73.218, loss_att=56.105, acc=0.697, loss=61.239, backward_time=1.028, grad_norm=100.719, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.657e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 06:57:30,072 (trainer:732) INFO: 30epoch:train:2101-2200batch: iter_time=6.216e-04, forward_time=0.150, loss_ctc=74.433, loss_att=53.049, acc=0.690, loss=59.464, backward_time=1.031, grad_norm=103.823, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.656e-05, train_time=2.734 +[gpub005:0/64] 2023-07-09 06:59:47,604 (trainer:732) INFO: 30epoch:train:2201-2300batch: iter_time=6.988e-04, forward_time=0.161, loss_ctc=68.122, loss_att=55.081, acc=0.701, loss=58.993, backward_time=1.028, grad_norm=102.516, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.184, optim0_lr0=6.655e-05, train_time=2.752 +[gpub005:0/64] 2023-07-09 07:02:03,279 (trainer:732) INFO: 30epoch:train:2301-2400batch: iter_time=9.358e-04, forward_time=0.146, loss_ctc=81.052, loss_att=59.227, acc=0.699, loss=65.775, backward_time=1.027, grad_norm=123.594, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.654e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 07:04:18,965 (trainer:732) INFO: 30epoch:train:2401-2500batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=70.548, loss_att=54.052, acc=0.711, loss=59.001, backward_time=1.027, grad_norm=110.402, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.652e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 07:04:24,292 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-09 07:04:42,292 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 07:04:45,699 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 07:04:45,699 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-09 07:04:45,777 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 07:11:21,417 (trainer:732) INFO: 30epoch:train:2501-2600batch: iter_time=1.621, forward_time=0.147, loss_ctc=77.350, loss_att=59.850, acc=0.713, loss=65.100, backward_time=1.046, grad_norm=102.810, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.651e-05, train_time=8.449 +[gpub005:0/64] 2023-07-09 07:13:37,716 (trainer:732) INFO: 30epoch:train:2601-2700batch: iter_time=1.199e-04, forward_time=0.147, loss_ctc=64.507, loss_att=51.570, acc=0.715, loss=55.451, backward_time=1.029, grad_norm=104.183, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.650e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 07:15:54,008 (trainer:732) INFO: 30epoch:train:2701-2800batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=75.266, loss_att=54.346, acc=0.696, loss=60.622, backward_time=1.032, grad_norm=108.024, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.649e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 07:18:09,966 (trainer:732) INFO: 30epoch:train:2801-2900batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=68.932, loss_att=47.991, acc=0.727, loss=54.273, backward_time=1.030, grad_norm=91.719, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.648e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 07:20:25,937 (trainer:732) INFO: 30epoch:train:2901-3000batch: iter_time=1.119e-04, forward_time=0.147, loss_ctc=76.108, loss_att=56.964, acc=0.689, loss=62.707, backward_time=1.030, grad_norm=111.677, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.647e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 07:22:41,672 (trainer:732) INFO: 30epoch:train:3001-3100batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=68.551, loss_att=47.543, acc=0.711, loss=53.845, backward_time=1.028, grad_norm=94.809, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.645e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 07:24:57,391 (trainer:732) INFO: 30epoch:train:3101-3200batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=76.505, loss_att=59.918, acc=0.709, loss=64.894, backward_time=1.028, grad_norm=102.831, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.644e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 07:27:13,353 (trainer:732) INFO: 30epoch:train:3201-3300batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=74.867, loss_att=58.218, acc=0.713, loss=63.213, backward_time=1.029, grad_norm=165.724, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.643e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 07:28:00,817 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-09 07:28:18,947 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 07:28:22,591 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 07:28:22,591 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-09 07:28:22,597 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 07:34:22,922 (trainer:732) INFO: 30epoch:train:3301-3400batch: iter_time=1.234, forward_time=0.170, loss_ctc=80.726, loss_att=62.733, acc=0.711, loss=68.131, backward_time=1.040, grad_norm=125.085, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.185, optim0_lr0=6.642e-05, train_time=8.591 +[gpub005:0/64] 2023-07-09 07:36:40,744 (trainer:732) INFO: 30epoch:train:3401-3500batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=69.925, loss_att=55.881, acc=0.710, loss=60.094, backward_time=1.036, grad_norm=119.662, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.641e-05, train_time=2.756 +[gpub005:0/64] 2023-07-09 07:38:56,449 (trainer:732) INFO: 30epoch:train:3501-3600batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=69.586, loss_att=53.200, acc=0.709, loss=58.116, backward_time=1.029, grad_norm=123.761, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.640e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 07:41:13,578 (trainer:732) INFO: 30epoch:train:3601-3700batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=71.179, loss_att=51.608, acc=0.710, loss=57.480, backward_time=1.027, grad_norm=100.747, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.638e-05, train_time=2.742 +[gpub005:0/64] 2023-07-09 07:43:30,037 (trainer:732) INFO: 30epoch:train:3701-3800batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=71.628, loss_att=52.400, acc=0.715, loss=58.169, backward_time=1.029, grad_norm=96.842, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.637e-05, train_time=2.729 +[gpub005:0/64] 2023-07-09 07:45:45,881 (trainer:732) INFO: 30epoch:train:3801-3900batch: iter_time=1.200e-04, forward_time=0.147, loss_ctc=72.724, loss_att=52.528, acc=0.693, loss=58.587, backward_time=1.028, grad_norm=108.896, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.636e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 07:48:11,019 (trainer:732) INFO: 30epoch:train:3901-4000batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=70.380, loss_att=54.615, acc=0.715, loss=59.344, backward_time=1.089, grad_norm=115.084, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.635e-05, train_time=2.903 +[gpub005:0/64] 2023-07-09 07:50:29,931 (trainer:732) INFO: 30epoch:train:4001-4100batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=79.491, loss_att=57.454, acc=0.715, loss=64.065, backward_time=1.034, grad_norm=115.797, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.634e-05, train_time=2.778 +[gpub005:0/64] 2023-07-09 07:52:17,245 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-09 07:52:35,320 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 07:52:38,875 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 07:52:38,875 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-09 07:52:38,881 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 07:57:13,828 (trainer:732) INFO: 30epoch:train:4101-4200batch: iter_time=1.808, forward_time=0.172, loss_ctc=73.203, loss_att=63.564, acc=0.703, loss=66.455, backward_time=1.059, grad_norm=135.634, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.633e-05, train_time=8.078 +[gpub005:0/64] 2023-07-09 07:59:30,354 (trainer:732) INFO: 30epoch:train:4201-4300batch: iter_time=1.169e-04, forward_time=0.148, loss_ctc=77.444, loss_att=60.785, acc=0.709, loss=65.783, backward_time=1.032, grad_norm=116.536, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.631e-05, train_time=2.730 +[gpub005:0/64] 2023-07-09 08:01:50,592 (trainer:732) INFO: 30epoch:train:4301-4400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=64.714, loss_att=52.178, acc=0.713, loss=55.939, backward_time=1.030, grad_norm=93.861, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.630e-05, train_time=2.805 +[gpub005:0/64] 2023-07-09 08:04:18,788 (trainer:732) INFO: 30epoch:train:4401-4500batch: iter_time=1.166e-04, forward_time=0.147, loss_ctc=74.033, loss_att=54.299, acc=0.707, loss=60.219, backward_time=1.056, grad_norm=102.941, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.629e-05, train_time=2.964 +[gpub005:0/64] 2023-07-09 08:06:38,566 (trainer:732) INFO: 30epoch:train:4501-4600batch: iter_time=1.187e-04, forward_time=0.144, loss_ctc=68.971, loss_att=48.825, acc=0.724, loss=54.868, backward_time=1.033, grad_norm=97.902, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.628e-05, train_time=2.795 +[gpub005:0/64] 2023-07-09 08:09:01,583 (trainer:732) INFO: 30epoch:train:4601-4700batch: iter_time=1.106e-04, forward_time=0.144, loss_ctc=74.694, loss_att=55.882, acc=0.686, loss=61.525, backward_time=1.036, grad_norm=154.070, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.627e-05, train_time=2.860 +[gpub005:0/64] 2023-07-09 08:11:17,207 (trainer:732) INFO: 30epoch:train:4701-4800batch: iter_time=1.118e-04, forward_time=0.145, loss_ctc=69.389, loss_att=49.217, acc=0.709, loss=55.269, backward_time=1.024, grad_norm=102.401, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.626e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 08:13:34,842 (trainer:732) INFO: 30epoch:train:4801-4900batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=76.215, loss_att=58.985, acc=0.716, loss=64.154, backward_time=1.029, grad_norm=102.975, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.624e-05, train_time=2.752 +[gpub005:0/64] 2023-07-09 08:15:50,733 (trainer:732) INFO: 30epoch:train:4901-5000batch: iter_time=1.189e-04, forward_time=0.147, loss_ctc=73.406, loss_att=57.659, acc=0.715, loss=62.383, backward_time=1.029, grad_norm=107.677, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.623e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 08:15:55,286 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-09 08:16:13,554 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 08:16:17,414 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 08:16:17,414 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-09 08:16:17,420 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 08:21:39,635 (trainer:732) INFO: 30epoch:train:5001-5100batch: iter_time=1.348, forward_time=0.148, loss_ctc=79.858, loss_att=65.978, acc=0.708, loss=70.142, backward_time=1.055, grad_norm=110.794, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.622e-05, train_time=6.978 +[gpub005:0/64] 2023-07-09 08:24:02,275 (trainer:732) INFO: 30epoch:train:5101-5200batch: iter_time=1.025e-04, forward_time=0.147, loss_ctc=68.121, loss_att=55.090, acc=0.713, loss=58.999, backward_time=1.036, grad_norm=97.120, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.621e-05, train_time=2.853 +[gpub005:0/64] 2023-07-09 08:26:17,891 (trainer:732) INFO: 30epoch:train:5201-5300batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=67.439, loss_att=45.809, acc=0.718, loss=52.298, backward_time=1.026, grad_norm=94.714, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.620e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 08:28:33,694 (trainer:732) INFO: 30epoch:train:5301-5400batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=65.888, loss_att=50.949, acc=0.715, loss=55.431, backward_time=1.027, grad_norm=105.113, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.619e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 08:30:49,534 (trainer:732) INFO: 30epoch:train:5401-5500batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=75.721, loss_att=55.905, acc=0.700, loss=61.850, backward_time=1.027, grad_norm=107.472, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.617e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 08:33:06,008 (trainer:732) INFO: 30epoch:train:5501-5600batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=72.755, loss_att=50.694, acc=0.711, loss=57.312, backward_time=1.028, grad_norm=108.338, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.616e-05, train_time=2.729 +[gpub005:0/64] 2023-07-09 08:35:21,994 (trainer:732) INFO: 30epoch:train:5601-5700batch: iter_time=9.944e-05, forward_time=0.147, loss_ctc=71.018, loss_att=55.206, acc=0.714, loss=59.950, backward_time=1.029, grad_norm=106.123, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.615e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 08:37:37,976 (trainer:732) INFO: 30epoch:train:5701-5800batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=75.038, loss_att=60.040, acc=0.717, loss=64.539, backward_time=1.029, grad_norm=119.425, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.614e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 08:38:26,516 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-09 08:38:44,159 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 08:38:47,519 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 08:38:47,520 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-09 08:38:47,581 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 08:44:06,011 (trainer:732) INFO: 30epoch:train:5801-5900batch: iter_time=1.453, forward_time=0.147, loss_ctc=74.094, loss_att=57.930, acc=0.715, loss=62.779, backward_time=1.047, grad_norm=106.855, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.613e-05, train_time=7.760 +[gpub005:0/64] 2023-07-09 08:46:23,423 (trainer:732) INFO: 30epoch:train:5901-6000batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=66.844, loss_att=52.858, acc=0.715, loss=57.054, backward_time=1.032, grad_norm=103.900, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.612e-05, train_time=2.748 +[gpub005:0/64] 2023-07-09 08:48:41,143 (trainer:732) INFO: 30epoch:train:6001-6100batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=65.513, loss_att=47.511, acc=0.713, loss=52.911, backward_time=1.033, grad_norm=110.804, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.610e-05, train_time=2.754 +[gpub005:0/64] 2023-07-09 08:50:57,547 (trainer:732) INFO: 30epoch:train:6101-6200batch: iter_time=1.083e-04, forward_time=0.146, loss_ctc=66.188, loss_att=50.999, acc=0.714, loss=55.556, backward_time=1.029, grad_norm=98.669, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.609e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 08:53:13,408 (trainer:732) INFO: 30epoch:train:6201-6300batch: iter_time=1.085e-04, forward_time=0.146, loss_ctc=75.870, loss_att=56.326, acc=0.701, loss=62.189, backward_time=1.029, grad_norm=108.691, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.608e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 08:55:30,067 (trainer:732) INFO: 30epoch:train:6301-6400batch: iter_time=1.029e-04, forward_time=0.146, loss_ctc=72.916, loss_att=50.933, acc=0.708, loss=57.528, backward_time=1.028, grad_norm=107.518, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.607e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 08:57:48,148 (trainer:732) INFO: 30epoch:train:6401-6500batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=71.883, loss_att=55.390, acc=0.712, loss=60.338, backward_time=1.029, grad_norm=98.625, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.606e-05, train_time=2.761 +[gpub005:0/64] 2023-07-09 09:00:03,990 (trainer:732) INFO: 30epoch:train:6501-6600batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=74.721, loss_att=57.672, acc=0.714, loss=62.787, backward_time=1.029, grad_norm=99.654, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.605e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 09:01:37,401 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-09 09:01:55,924 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 09:01:59,360 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 09:01:59,360 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-09 09:01:59,366 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 09:05:31,034 (trainer:732) INFO: 30epoch:train:6601-6700batch: iter_time=1.189, forward_time=0.188, loss_ctc=75.246, loss_att=59.358, acc=0.716, loss=64.124, backward_time=1.048, grad_norm=107.046, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.185, optim0_lr0=6.604e-05, train_time=6.541 +[gpub005:0/64] 2023-07-09 09:07:47,651 (trainer:732) INFO: 30epoch:train:6701-6800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=76.815, loss_att=61.302, acc=0.695, loss=65.956, backward_time=1.029, grad_norm=110.285, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.602e-05, train_time=2.732 +[gpub005:0/64] 2023-07-09 09:10:03,733 (trainer:732) INFO: 30epoch:train:6801-6900batch: iter_time=1.232e-04, forward_time=0.147, loss_ctc=63.395, loss_att=53.738, acc=0.705, loss=56.635, backward_time=1.029, grad_norm=93.495, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.601e-05, train_time=2.721 +[gpub005:0/64] 2023-07-09 09:12:20,215 (trainer:732) INFO: 30epoch:train:6901-7000batch: iter_time=1.293e-04, forward_time=0.146, loss_ctc=73.005, loss_att=53.910, acc=0.703, loss=59.639, backward_time=1.029, grad_norm=116.643, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.600e-05, train_time=2.729 +[gpub005:0/64] 2023-07-09 09:14:35,852 (trainer:732) INFO: 30epoch:train:7001-7100batch: iter_time=1.414e-04, forward_time=0.146, loss_ctc=68.606, loss_att=50.078, acc=0.714, loss=55.637, backward_time=1.028, grad_norm=91.992, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.599e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 09:16:56,392 (trainer:732) INFO: 30epoch:train:7101-7200batch: iter_time=1.186e-04, forward_time=0.174, loss_ctc=72.330, loss_att=55.852, acc=0.689, loss=60.795, backward_time=1.029, grad_norm=103.971, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.598e-05, train_time=2.811 +[gpub005:0/64] 2023-07-09 09:19:14,156 (trainer:732) INFO: 30epoch:train:7201-7300batch: iter_time=1.131e-04, forward_time=0.161, loss_ctc=66.916, loss_att=49.747, acc=0.706, loss=54.898, backward_time=1.027, grad_norm=91.907, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.597e-05, train_time=2.754 +[gpub005:0/64] 2023-07-09 09:21:33,212 (trainer:732) INFO: 30epoch:train:7301-7400batch: iter_time=1.143e-04, forward_time=0.147, loss_ctc=74.799, loss_att=59.157, acc=0.709, loss=63.849, backward_time=1.031, grad_norm=114.878, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.595e-05, train_time=2.782 +[gpub005:0/64] 2023-07-09 09:23:48,998 (trainer:732) INFO: 30epoch:train:7401-7500batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=71.607, loss_att=57.464, acc=0.706, loss=61.707, backward_time=1.027, grad_norm=123.667, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.594e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 09:24:03,715 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-09 09:24:22,085 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 09:24:25,849 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 09:24:25,849 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-09 09:24:25,855 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 09:30:03,490 (trainer:732) INFO: 30epoch:train:7501-7600batch: iter_time=2.212, forward_time=0.145, loss_ctc=79.143, loss_att=65.331, acc=0.698, loss=69.474, backward_time=1.041, grad_norm=116.935, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.593e-05, train_time=7.490 +[gpub005:0/64] 2023-07-09 09:32:20,507 (trainer:732) INFO: 30epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=68.209, loss_att=54.927, acc=0.710, loss=58.912, backward_time=1.029, grad_norm=98.114, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.592e-05, train_time=2.740 +[gpub005:0/64] 2023-07-09 09:34:37,364 (trainer:732) INFO: 30epoch:train:7701-7800batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=67.320, loss_att=47.178, acc=0.705, loss=53.221, backward_time=1.028, grad_norm=107.580, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.591e-05, train_time=2.737 +[gpub005:0/64] 2023-07-09 09:36:52,907 (trainer:732) INFO: 30epoch:train:7801-7900batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=65.382, loss_att=51.048, acc=0.714, loss=55.348, backward_time=1.025, grad_norm=94.448, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.590e-05, train_time=2.711 +[gpub005:0/64] 2023-07-09 09:39:08,490 (trainer:732) INFO: 30epoch:train:7901-8000batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=74.717, loss_att=57.283, acc=0.693, loss=62.513, backward_time=1.026, grad_norm=109.439, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.589e-05, train_time=2.711 +[gpub005:0/64] 2023-07-09 09:41:24,030 (trainer:732) INFO: 30epoch:train:8001-8100batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=72.670, loss_att=51.262, acc=0.705, loss=57.684, backward_time=1.025, grad_norm=114.707, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.587e-05, train_time=2.711 +[gpub005:0/64] 2023-07-09 09:43:40,044 (trainer:732) INFO: 30epoch:train:8101-8200batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=71.182, loss_att=53.993, acc=0.710, loss=59.150, backward_time=1.030, grad_norm=103.073, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.586e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 09:45:55,894 (trainer:732) INFO: 30epoch:train:8201-8300batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=75.740, loss_att=60.546, acc=0.707, loss=65.104, backward_time=1.028, grad_norm=105.365, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.585e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 09:46:42,025 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-09 09:47:00,609 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 09:47:04,055 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 09:47:04,055 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-09 09:47:04,061 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 09:52:57,152 (trainer:732) INFO: 30epoch:train:8301-8400batch: iter_time=1.228, forward_time=0.146, loss_ctc=77.226, loss_att=61.263, acc=0.697, loss=66.052, backward_time=1.037, grad_norm=105.579, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.584e-05, train_time=8.425 +[gpub005:0/64] 2023-07-09 09:55:13,828 (trainer:732) INFO: 30epoch:train:8401-8500batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=69.146, loss_att=55.329, acc=0.709, loss=59.474, backward_time=1.029, grad_norm=107.471, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.583e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 09:57:29,536 (trainer:732) INFO: 30epoch:train:8501-8600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=66.513, loss_att=53.263, acc=0.703, loss=57.238, backward_time=1.025, grad_norm=96.505, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.582e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 09:59:45,955 (trainer:732) INFO: 30epoch:train:8601-8700batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=70.589, loss_att=51.866, acc=0.707, loss=57.483, backward_time=1.029, grad_norm=107.538, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.581e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 10:02:01,664 (trainer:732) INFO: 30epoch:train:8701-8800batch: iter_time=1.318e-04, forward_time=0.146, loss_ctc=70.315, loss_att=53.128, acc=0.709, loss=58.284, backward_time=1.029, grad_norm=101.284, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.579e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 10:04:17,103 (trainer:732) INFO: 30epoch:train:8801-8900batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=75.096, loss_att=52.904, acc=0.696, loss=59.561, backward_time=1.027, grad_norm=104.484, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.578e-05, train_time=2.709 +[gpub005:0/64] 2023-07-09 10:06:32,753 (trainer:732) INFO: 30epoch:train:8901-9000batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=67.620, loss_att=53.536, acc=0.711, loss=57.762, backward_time=1.028, grad_norm=97.983, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.577e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 10:08:48,415 (trainer:732) INFO: 30epoch:train:9001-9100batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=79.714, loss_att=58.394, acc=0.703, loss=64.790, backward_time=1.029, grad_norm=104.995, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.576e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 10:10:19,724 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-09 10:10:37,937 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 10:10:41,460 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 10:10:41,461 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-09 10:10:41,467 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 10:13:44,726 (trainer:732) INFO: 30epoch:train:9101-9200batch: iter_time=1.276, forward_time=0.172, loss_ctc=72.067, loss_att=56.062, acc=0.707, loss=60.863, backward_time=1.041, grad_norm=115.598, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.184, optim0_lr0=6.575e-05, train_time=5.926 +[gpub005:0/64] 2023-07-09 10:16:01,376 (trainer:732) INFO: 30epoch:train:9201-9300batch: iter_time=1.195e-04, forward_time=0.145, loss_ctc=70.122, loss_att=56.197, acc=0.700, loss=60.374, backward_time=1.030, grad_norm=113.875, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.574e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 10:18:17,221 (trainer:732) INFO: 30epoch:train:9301-9400batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=66.656, loss_att=53.634, acc=0.703, loss=57.541, backward_time=1.027, grad_norm=107.518, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.573e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 10:20:33,434 (trainer:732) INFO: 30epoch:train:9401-9500batch: iter_time=1.426e-04, forward_time=0.146, loss_ctc=70.982, loss_att=51.945, acc=0.706, loss=57.656, backward_time=1.028, grad_norm=103.989, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.572e-05, train_time=2.724 +[gpub005:0/64] 2023-07-09 10:22:49,713 (trainer:732) INFO: 30epoch:train:9501-9600batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=71.163, loss_att=53.531, acc=0.707, loss=58.820, backward_time=1.028, grad_norm=106.954, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.570e-05, train_time=2.725 +[gpub005:0/64] 2023-07-09 10:25:05,526 (trainer:732) INFO: 30epoch:train:9601-9700batch: iter_time=1.256e-04, forward_time=0.145, loss_ctc=72.003, loss_att=50.687, acc=0.699, loss=57.082, backward_time=1.026, grad_norm=95.981, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.569e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 10:27:21,391 (trainer:732) INFO: 30epoch:train:9701-9800batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=67.314, loss_att=53.367, acc=0.709, loss=57.551, backward_time=1.028, grad_norm=101.436, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.568e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 10:29:37,738 (trainer:732) INFO: 30epoch:train:9801-9900batch: iter_time=1.103e-04, forward_time=0.144, loss_ctc=79.876, loss_att=57.750, acc=0.705, loss=64.387, backward_time=1.026, grad_norm=108.714, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.567e-05, train_time=2.727 +[gpub005:0/64] 2023-07-09 10:31:53,636 (trainer:732) INFO: 30epoch:train:9901-10000batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=69.642, loss_att=52.891, acc=0.711, loss=57.916, backward_time=1.026, grad_norm=127.204, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.566e-05, train_time=2.718 +gpub030:2531971:2532059 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub018:1650755:1650841 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub012:1607819:1607909 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub014:1495254:1495339 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub014:1495256:1495336 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub030:2531969:2532058 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub030:2531970:2532060 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub096:1645785:1645863 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub097:1705871:1705965 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub013:1694053:1694137 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub013:1694056:1694139 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub013:1694055:1694136 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub018:1650753:1650840 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub098:1875740:1875815 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub098:1875738:1875816 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub098:1875741:1875817 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub012:1607821:1607907 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub012:1607818:1607910 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub014:1495255:1495338 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub014:1495257:1495337 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub097:1705870:1705966 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub030:2531972:2532057 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub018:1650756:1650842 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub096:1645786:1645864 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub096:1645787:1645865 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub096:1645784:1645866 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub012:1607820:1607908 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub018:1650755:1650755 [2] NCCL INFO comm 0x513374c0 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub039:2093177:2093253 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub039:2093176:2093252 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub018:1650754:1650839 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub095:2520060:2520144 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub039:2093175:2093254 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub097:1705871:1705871 [3] NCCL INFO comm 0x94d2db0 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub039:2093178:2093251 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub040:2093693:2093783 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub040:2093691:2093781 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub040:2093692:2093780 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub095:2520059:2520143 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub040:2093690:2093782 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub072:1805521:1805611 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub095:2520061:2520145 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub013:1694056:1694056 [3] NCCL INFO comm 0x8c00090 rank 11 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub098:1875739:1875818 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub095:2520062:2520146 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub072:1805522:1805612 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub097:1705868:1705963 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub097:1705869:1705964 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub072:1805519:1805613 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub005:2408154:2408234 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub072:1805520:1805610 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub098:1875739:1875739 [1] NCCL INFO comm 0x4ffeee90 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub098:1875740:1875740 [2] NCCL INFO comm 0x8c9fbb0 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub030:2531969:2531969 [0] NCCL INFO comm 0xb4f6de0 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub013:1694055:1694055 [2] NCCL INFO comm 0xf6b9b10 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub013:1694053:1694053 [0] NCCL INFO comm 0x8c6ae750 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub096:1645785:1645785 [1] NCCL INFO comm 0x50f7e840 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub097:1705870:1705870 [2] NCCL INFO comm 0x50f117a0 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub012:1607820:1607820 [2] NCCL INFO comm 0x503f1430 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub098:1875741:1875741 [3] NCCL INFO comm 0x4ecd4ee0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub072:1805521:1805521 [2] NCCL INFO comm 0x8d829e60 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub096:1645786:1645786 [2] NCCL INFO comm 0x4fe9cb90 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub039:2093177:2093177 [2] NCCL INFO comm 0xa965b10 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub013:1694054:1694138 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub072:1805520:1805520 [1] NCCL INFO comm 0xb6f41780 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub098:1875738:1875738 [0] NCCL INFO comm 0x9e5ca730 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub030:2531972:2531972 [3] NCCL INFO comm 0xa2cf1d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub018:1650753:1650753 [0] NCCL INFO comm 0x4f7a1b90 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub097:1705869:1705869 [1] NCCL INFO comm 0x8e89510 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub018:1650756:1650756 [3] NCCL INFO comm 0x8c504da0 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub012:1607818:1607818 [0] NCCL INFO comm 0xa8f3bc80 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub072:1805519:1805519 [0] NCCL INFO comm 0x4fb13ad0 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub005:2408152:2408235 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub005:2408153:2408237 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub030:2531970:2531970 [1] NCCL INFO comm 0x8ebc3340 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub030:2531971:2531971 [2] NCCL INFO comm 0x8dd18cd0 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub095:2520059:2520059 [0] NCCL INFO comm 0x15bb1c50 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub039:2093175:2093175 [0] NCCL INFO comm 0xa2cab60 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub012:1607819:1607819 [1] NCCL INFO comm 0xa4ee840 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub072:1805522:1805522 [3] NCCL INFO comm 0x50740450 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub005:2408154:2408154 [3] NCCL INFO comm 0x519a6580 rank 3 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub018:1650754:1650754 [1] NCCL INFO comm 0xa938d420 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub039:2093176:2093176 [1] NCCL INFO comm 0xbcbaabd0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub095:2520061:2520061 [2] NCCL INFO comm 0x91b7930 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub096:1645787:1645787 [3] NCCL INFO comm 0xb78b6390 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub012:1607821:1607821 [3] NCCL INFO comm 0x516c3430 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub039:2093178:2093178 [3] NCCL INFO comm 0x4fc75960 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub005:2408153:2408153 [2] NCCL INFO comm 0x4fab6870 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub084:4052710:4052803 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub040:2093690:2093690 [0] NCCL INFO comm 0xba9dc4d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub014:1495254:1495254 [0] NCCL INFO comm 0x50fe0a80 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub084:4052709:4052802 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub084:4052708:4052804 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub084:4052711:4052801 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub096:1645784:1645784 [0] NCCL INFO comm 0xcdcc14f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub014:1495256:1495256 [2] NCCL INFO comm 0x9f383a90 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub005:2408152:2408152 [1] NCCL INFO comm 0x50e7e140 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub040:2093692:2093692 [2] NCCL INFO comm 0x514bd130 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub013:1694054:1694054 [1] NCCL INFO comm 0x5088d590 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub095:2520062:2520062 [3] NCCL INFO comm 0x8c7104c0 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub095:2520060:2520060 [1] NCCL INFO comm 0xb4653490 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub040:2093691:2093691 [1] NCCL INFO comm 0xb9336880 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub014:1495257:1495257 [3] NCCL INFO comm 0x946a450 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub097:1705868:1705868 [0] NCCL INFO comm 0x4f565ad0 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub040:2093693:2093693 [3] NCCL INFO comm 0xbd6eac10 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub014:1495255:1495255 [1] NCCL INFO comm 0x515d3c50 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub084:4052708:4052708 [0] NCCL INFO comm 0xb576c9d0 rank 44 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub084:4052710:4052710 [2] NCCL INFO comm 0x4f81fce0 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub084:4052709:4052709 [1] NCCL INFO comm 0xd834420 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub084:4052711:4052711 [3] NCCL INFO comm 0xa5710b50 rank 47 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub041:1527385:1527468 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub041:1527384:1527467 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub041:1527385:1527385 [2] NCCL INFO comm 0x5082e9e0 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub041:1527383:1527470 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub041:1527386:1527469 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub041:1527386:1527386 [3] NCCL INFO comm 0x4f979490 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub041:1527384:1527384 [1] NCCL INFO comm 0x512a04d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub041:1527383:1527383 [0] NCCL INFO comm 0x5103b480 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub067:1574057:1574141 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub067:1574055:1574142 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub067:1574056:1574140 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub067:1574057:1574057 [3] NCCL INFO comm 0x8d973650 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub067:1574055:1574055 [1] NCCL INFO comm 0x509b90f0 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub067:1574054:1574139 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub067:1574056:1574056 [2] NCCL INFO comm 0xb006d7d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub067:1574054:1574054 [0] NCCL INFO comm 0x4f342150 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +[gpub005:0/64] 2023-07-09 10:43:52,593 (trainer:338) INFO: 30epoch results: [train] iter_time=0.191, forward_time=0.148, loss_ctc=72.388, loss_att=55.270, acc=0.706, loss=60.405, backward_time=1.032, grad_norm=108.407, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.623e-05, train_time=3.318, time=4 hours, 36 minutes and 42.57 seconds, total_count=270000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=46.634, cer_ctc=0.267, loss_att=40.617, acc=0.667, cer=0.399, wer=0.998, loss=42.422, time=5 minutes and 56.2 seconds, total_count=27830, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 53.71 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-09 10:44:08,080 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-09 10:44:08,120 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till30epoch.pth +[gpub005:0/64] 2023-07-09 10:45:00,456 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till30epoch.pth +[gpub005:0/64] 2023-07-09 10:45:39,702 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/22epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/25epoch.pth +[gpub005:0/64] 2023-07-09 10:45:39,703 (trainer:458) INFO: The training was finished at 30 epochs +[gpub005:0/64] 2023-07-09 10:45:39,705 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.pth +[gpub005:0/64] 2023-07-09 10:45:50,816 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.pth +gpub005:2408151:2408236 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub005:2408151:2408151 [0] NCCL INFO comm 0x8dda0850 rank 0 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +# Accounting: begin_time=1688778318 +# Accounting: end_time=1688917563 +# Accounting: time=139245 threads=1 +# Finished at Sun Jul 9 10:46:03 CDT 2023 with status 0