# python3 -m espnet2.bin.asr_train --use_preprocessor true --bpemodel data/en_token_list/bpe_unigram600/bpe.model --token_type bpe --token_list data/en_token_list/bpe_unigram600/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_English/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/dev_English/text,text,text --valid_shape_file exp/asr_stats_raw_en_bpe600/valid/speech_shape --valid_shape_file exp/asr_stats_raw_en_bpe600/valid/text_shape.bpe --resume true --fold_length 80000 --fold_length 150 --output_dir exp/asr_IndEng188h-rnnt-600bpe --config conf/tuning/train_conformer-rnn_transducer.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/asr_stats_raw_en_bpe600/train/feats_stats.npz --train_data_path_and_name_and_type dump/raw/train_English/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train_English/text,text,text --train_shape_file exp/asr_stats_raw_en_bpe600/train/speech_shape --train_shape_file exp/asr_stats_raw_en_bpe600/train/text_shape.bpe --ngpu 1 --multiprocessing_distributed True # Started at Fri Jun 3 22:33:08 IST 2022 # /speech/umeshs/espnet-v.202205/tools/anaconda/envs/espnet/bin/python3 /speech/umeshs/espnet-v.202205/espnet2/bin/asr_train.py --use_preprocessor true --bpemodel data/en_token_list/bpe_unigram600/bpe.model --token_type bpe --token_list data/en_token_list/bpe_unigram600/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_English/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/dev_English/text,text,text --valid_shape_file exp/asr_stats_raw_en_bpe600/valid/speech_shape --valid_shape_file exp/asr_stats_raw_en_bpe600/valid/text_shape.bpe --resume true --fold_length 80000 --fold_length 150 --output_dir exp/asr_IndEng188h-rnnt-600bpe --config conf/tuning/train_conformer-rnn_transducer.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/asr_stats_raw_en_bpe600/train/feats_stats.npz --train_data_path_and_name_and_type dump/raw/train_English/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train_English/text,text,text --train_shape_file exp/asr_stats_raw_en_bpe600/train/speech_shape --train_shape_file exp/asr_stats_raw_en_bpe600/train/text_shape.bpe --ngpu 1 --multiprocessing_distributed True [gpu11] 2022-06-03 22:33:10,345 (asr:399) INFO: Vocabulary size: 600 [gpu11] 2022-06-03 22:33:13,537 (abs_task:1149) INFO: pytorch.version=1.9.1, cuda.available=True, cudnn.version=8005, cudnn.benchmark=False, cudnn.deterministic=True [gpu11] 2022-06-03 22:33:13,544 (abs_task:1150) INFO: Model structure: ESPnetASRModel( (frontend): DefaultFrontend( (stft): Stft(n_fft=512, win_length=512, hop_length=160, center=True, normalized=False, onesided=True) (frontend): Frontend() (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) ) (specaug): SpecAug( (time_warp): TimeWarp(window=5, mode=bicubic) (freq_mask): MaskAlongAxis(mask_width_range=[0, 30], num_mask=2, axis=freq) (time_mask): MaskAlongAxis(mask_width_range=[0, 40], num_mask=2, axis=time) ) (normalize): GlobalMVN(stats_file=exp/asr_stats_raw_en_bpe600/train/feats_stats.npz, norm_means=True, norm_vars=True) (encoder): ConformerEncoder( (embed): Conv2dSubsampling( (conv): Sequential( (0): Conv2d(1, 512, kernel_size=(3, 3), stride=(2, 2)) (1): ReLU() (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2)) (3): ReLU() ) (out): Sequential( (0): Linear(in_features=9728, out_features=512, bias=True) (1): RelPositionalEncoding( (dropout): Dropout(p=0.1, inplace=False) ) ) ) (encoders): MultiSequential( (0): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (1): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (2): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (3): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (4): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (5): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (6): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (7): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (8): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (9): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (10): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (11): EncoderLayer( (self_attn): RelPositionMultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear_pos): Linear(in_features=512, out_features=512, bias=False) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (conv_module): ConvolutionModule( (pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,)) (depthwise_conv): Conv1d(512, 512, kernel_size=(31,), stride=(1,), padding=(15,), groups=512) (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,)) (activation): Swish() ) (norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (after_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True) ) (decoder): TransducerDecoder( (embed): Embedding(600, 512, padding_idx=0) (dropout_embed): Dropout(p=0.2, inplace=False) (decoder): ModuleList( (0): LSTM(512, 512, batch_first=True) ) (dropout_dec): ModuleList( (0): Dropout(p=0.1, inplace=False) ) ) (joint_network): JointNetwork( (lin_enc): Linear(in_features=512, out_features=640, bias=True) (lin_dec): Linear(in_features=512, out_features=640, bias=True) (lin_out): Linear(in_features=640, out_features=600, bias=True) (joint_activation): Tanh() ) (criterion_transducer): RNNTLoss() (ctc): CTC( (ctc_lo): Linear(in_features=512, out_features=600, bias=True) (ctc_loss): CTCLoss() ) ) Model summary: Class Name: ESPnetASRModel Total Number of model parameters: 86.99 M Number of trainable parameters: 86.99 M (100.0%) Size: 347.96 MB Type: torch.float32 [gpu11] 2022-06-03 22:33:13,544 (abs_task:1153) INFO: Optimizer: Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 initial_lr: 0.0015 lr: 6.000000000000001e-08 weight_decay: 1e-06 ) [gpu11] 2022-06-03 22:33:13,544 (abs_task:1154) INFO: Scheduler: WarmupLR(warmup_steps=25000) [gpu11] 2022-06-03 22:33:13,544 (abs_task:1163) INFO: Saving the configuration in exp/asr_IndEng188h-rnnt-600bpe/config.yaml [gpu11] 2022-06-03 22:33:14,706 (abs_task:1517) INFO: [train] dataset: ESPnetDataset( speech: {"path": "dump/raw/train_English/wav.scp", "type": "sound"} text: {"path": "dump/raw/train_English/text", "type": "text"} preprocess: ) [gpu11] 2022-06-03 22:33:14,706 (abs_task:1518) INFO: [train] Batch sampler: FoldedBatchSampler(N-batch=9503, batch_size=20, shape_files=['exp/asr_stats_raw_en_bpe600/train/speech_shape', 'exp/asr_stats_raw_en_bpe600/train/text_shape.bpe'], sort_in_batch=descending, sort_batch=descending) [gpu11] 2022-06-03 22:33:14,708 (abs_task:1519) INFO: [train] mini-batch sizes summary: N-batch=9503, mean=11.6, min=2, max=20 [gpu11] 2022-06-03 22:33:14,783 (abs_task:1517) INFO: [valid] dataset: ESPnetDataset( speech: {"path": "dump/raw/dev_English/wav.scp", "type": "sound"} text: {"path": "dump/raw/dev_English/text", "type": "text"} preprocess: ) [gpu11] 2022-06-03 22:33:14,783 (abs_task:1518) INFO: [valid] Batch sampler: FoldedBatchSampler(N-batch=281, batch_size=20, shape_files=['exp/asr_stats_raw_en_bpe600/valid/speech_shape', 'exp/asr_stats_raw_en_bpe600/valid/text_shape.bpe'], sort_in_batch=descending, sort_batch=descending) [gpu11] 2022-06-03 22:33:14,783 (abs_task:1519) INFO: [valid] mini-batch sizes summary: N-batch=281, mean=10.7, min=4, max=20 [gpu11] 2022-06-03 22:33:14,887 (trainer:274) INFO: 1/20epoch started /speech/umeshs/espnet-v.202205/tools/anaconda/envs/espnet/lib/python3.8/site-packages/torch/_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at /opt/conda/conda-bld/pytorch_1631630839582/work/aten/src/ATen/native/BinaryOps.cpp:467.) return torch.floor_divide(self, other) [gpu11] 2022-06-03 22:34:21,803 (trainer:672) INFO: 1epoch:train:1-475batch: iter_time=6.659e-04, forward_time=0.050, loss_ctc=518.976, loss_transducer=549.041, loss=176.183, backward_time=0.048, optim_step_time=0.032, optim0_lr0=3.630e-06, train_time=0.563 [gpu11] 2022-06-03 22:35:28,461 (trainer:672) INFO: 1epoch:train:476-950batch: iter_time=1.491e-04, forward_time=0.049, loss_ctc=157.094, loss_transducer=157.758, loss=51.221, backward_time=0.046, optim_step_time=0.031, optim0_lr0=1.074e-05, train_time=0.561 [gpu11] 2022-06-03 22:36:36,020 (trainer:672) INFO: 1epoch:train:951-1425batch: iter_time=1.786e-04, forward_time=0.050, loss_ctc=138.066, loss_transducer=137.365, loss=44.696, backward_time=0.047, optim_step_time=0.032, optim0_lr0=1.788e-05, train_time=0.569 [gpu11] 2022-06-03 22:37:42,151 (trainer:672) INFO: 1epoch:train:1426-1900batch: iter_time=1.813e-04, forward_time=0.049, loss_ctc=131.613, loss_transducer=131.466, loss=42.737, backward_time=0.047, optim_step_time=0.032, optim0_lr0=2.502e-05, train_time=0.556 [gpu11] 2022-06-03 22:38:49,071 (trainer:672) INFO: 1epoch:train:1901-2375batch: iter_time=1.824e-04, forward_time=0.049, loss_ctc=134.438, loss_transducer=133.640, loss=43.493, backward_time=0.047, optim_step_time=0.032, optim0_lr0=3.213e-05, train_time=0.564 [gpu11] 2022-06-03 22:39:55,664 (trainer:672) INFO: 1epoch:train:2376-2850batch: iter_time=1.468e-04, forward_time=0.049, loss_ctc=131.606, loss_transducer=128.864, loss=42.086, backward_time=0.048, optim_step_time=0.032, optim0_lr0=3.924e-05, train_time=0.561 [gpu11] 2022-06-03 22:41:02,589 (trainer:672) INFO: 1epoch:train:2851-3325batch: iter_time=1.528e-04, forward_time=0.049, loss_ctc=134.813, loss_transducer=129.033, loss=42.369, backward_time=0.048, optim_step_time=0.032, optim0_lr0=4.638e-05, train_time=0.563 [gpu11] 2022-06-03 22:42:09,940 (trainer:672) INFO: 1epoch:train:3326-3800batch: iter_time=1.577e-04, forward_time=0.050, loss_ctc=131.937, loss_transducer=122.440, loss=40.505, backward_time=0.046, optim_step_time=0.032, optim0_lr0=5.352e-05, train_time=0.566 [gpu11] 2022-06-03 22:43:16,431 (trainer:672) INFO: 1epoch:train:3801-4275batch: iter_time=1.528e-04, forward_time=0.050, loss_ctc=125.877, loss_transducer=113.911, loss=37.918, backward_time=0.046, optim_step_time=0.032, optim0_lr0=6.063e-05, train_time=0.560 [gpu11] 2022-06-03 22:44:21,900 (trainer:672) INFO: 1epoch:train:4276-4750batch: iter_time=1.729e-04, forward_time=0.048, loss_ctc=131.862, loss_transducer=116.752, loss=39.078, backward_time=0.044, optim_step_time=0.031, optim0_lr0=6.774e-05, train_time=0.552 [gpu11] 2022-06-03 22:45:28,648 (trainer:672) INFO: 1epoch:train:4751-5225batch: iter_time=1.691e-04, forward_time=0.049, loss_ctc=129.279, loss_transducer=111.466, loss=37.562, backward_time=0.048, optim_step_time=0.032, optim0_lr0=7.488e-05, train_time=0.562 [gpu11] 2022-06-03 22:46:35,527 (trainer:672) INFO: 1epoch:train:5226-5700batch: iter_time=1.559e-04, forward_time=0.050, loss_ctc=123.598, loss_transducer=104.672, loss=35.438, backward_time=0.048, optim_step_time=0.032, optim0_lr0=8.202e-05, train_time=0.563 [gpu11] 2022-06-03 22:47:43,198 (trainer:672) INFO: 1epoch:train:5701-6175batch: iter_time=1.522e-04, forward_time=0.050, loss_ctc=133.330, loss_transducer=111.516, loss=37.879, backward_time=0.047, optim_step_time=0.032, optim0_lr0=8.913e-05, train_time=0.570 [gpu11] 2022-06-03 22:48:50,430 (trainer:672) INFO: 1epoch:train:6176-6650batch: iter_time=1.562e-04, forward_time=0.050, loss_ctc=131.503, loss_transducer=108.244, loss=36.924, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.624e-05, train_time=0.566 [gpu11] 2022-06-03 22:49:56,650 (trainer:672) INFO: 1epoch:train:6651-7125batch: iter_time=1.546e-04, forward_time=0.049, loss_ctc=123.986, loss_transducer=100.389, loss=34.396, backward_time=0.045, optim_step_time=0.031, optim0_lr0=1.034e-04, train_time=0.557 [gpu11] 2022-06-03 22:51:03,398 (trainer:672) INFO: 1epoch:train:7126-7600batch: iter_time=1.512e-04, forward_time=0.049, loss_ctc=128.408, loss_transducer=103.310, loss=35.458, backward_time=0.045, optim_step_time=0.031, optim0_lr0=1.105e-04, train_time=0.562 [gpu11] 2022-06-03 22:52:09,869 (trainer:672) INFO: 1epoch:train:7601-8075batch: iter_time=1.511e-04, forward_time=0.049, loss_ctc=127.073, loss_transducer=101.454, loss=34.894, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.176e-04, train_time=0.560 [gpu11] 2022-06-03 22:53:16,753 (trainer:672) INFO: 1epoch:train:8076-8550batch: iter_time=1.546e-04, forward_time=0.049, loss_ctc=128.073, loss_transducer=100.625, loss=34.762, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.247e-04, train_time=0.563 [gpu11] 2022-06-03 22:54:22,462 (trainer:672) INFO: 1epoch:train:8551-9025batch: iter_time=1.535e-04, forward_time=0.048, loss_ctc=121.737, loss_transducer=94.768, loss=32.822, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.319e-04, train_time=0.553 [gpu11] 2022-06-03 22:55:29,968 (trainer:672) INFO: 1epoch:train:9026-9500batch: iter_time=1.561e-04, forward_time=0.049, loss_ctc=127.467, loss_transducer=98.143, loss=34.096, backward_time=0.046, optim_step_time=0.032, optim0_lr0=1.390e-04, train_time=0.568 /speech/umeshs/espnet-v.202205/espnet2/train/reporter.py:79: UserWarning: No valid stats found warnings.warn("No valid stats found") [gpu11] 2022-06-03 22:55:45,234 (trainer:328) INFO: 1epoch results: [train] iter_time=1.847e-04, forward_time=0.049, loss_ctc=150.153, loss_transducer=137.335, loss=45.595, backward_time=0.046, optim_step_time=0.032, optim0_lr0=7.134e-05, train_time=0.562, time=22 minutes and 15.8 seconds, total_count=9503, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=152.638, cer_ctc=0.951, loss_transducer=118.076, cer_transducer=nan, wer_transducer=nan, loss=163.867, time=14.53 seconds, total_count=281, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-03 22:55:47,849 (trainer:376) INFO: The best model has been updated: valid.loss [gpu11] 2022-06-03 22:55:47,849 (trainer:262) INFO: 2/20epoch started. Estimated time to finish: 7 hours, 8 minutes and 26.28 seconds [gpu11] 2022-06-03 22:56:55,840 (trainer:672) INFO: 2epoch:train:1-475batch: iter_time=5.628e-04, forward_time=0.050, loss_ctc=125.081, loss_transducer=94.710, loss=33.059, backward_time=0.046, optim_step_time=0.031, optim0_lr0=1.461e-04, train_time=0.572 [gpu11] 2022-06-03 22:58:04,488 (trainer:672) INFO: 2epoch:train:476-950batch: iter_time=1.567e-04, forward_time=0.050, loss_ctc=123.522, loss_transducer=92.480, loss=32.384, backward_time=0.050, optim_step_time=0.032, optim0_lr0=1.532e-04, train_time=0.578 [gpu11] 2022-06-03 22:59:11,800 (trainer:672) INFO: 2epoch:train:951-1425batch: iter_time=1.496e-04, forward_time=0.049, loss_ctc=121.226, loss_transducer=89.324, loss=31.423, backward_time=0.049, optim_step_time=0.031, optim0_lr0=1.604e-04, train_time=0.567 [gpu11] 2022-06-03 23:00:18,985 (trainer:672) INFO: 2epoch:train:1426-1900batch: iter_time=1.503e-04, forward_time=0.049, loss_ctc=123.056, loss_transducer=89.848, loss=31.691, backward_time=0.046, optim_step_time=0.031, optim0_lr0=1.675e-04, train_time=0.565 [gpu11] 2022-06-03 23:01:26,499 (trainer:672) INFO: 2epoch:train:1901-2375batch: iter_time=1.661e-04, forward_time=0.050, loss_ctc=128.164, loss_transducer=93.147, loss=32.899, backward_time=0.048, optim_step_time=0.031, optim0_lr0=1.746e-04, train_time=0.569 [gpu11] 2022-06-03 23:02:33,808 (trainer:672) INFO: 2epoch:train:2376-2850batch: iter_time=2.057e-04, forward_time=0.050, loss_ctc=125.491, loss_transducer=90.048, loss=31.924, backward_time=0.045, optim_step_time=0.031, optim0_lr0=1.817e-04, train_time=0.566 [gpu11] 2022-06-03 23:03:41,218 (trainer:672) INFO: 2epoch:train:2851-3325batch: iter_time=2.040e-04, forward_time=0.050, loss_ctc=125.418, loss_transducer=87.285, loss=31.228, backward_time=0.046, optim_step_time=0.031, optim0_lr0=1.889e-04, train_time=0.568 [gpu11] 2022-06-03 23:04:47,640 (trainer:672) INFO: 2epoch:train:3326-3800batch: iter_time=1.505e-04, forward_time=0.049, loss_ctc=126.706, loss_transducer=87.472, loss=31.371, backward_time=0.046, optim_step_time=0.031, optim0_lr0=1.960e-04, train_time=0.559 [gpu11] 2022-06-03 23:05:55,292 (trainer:672) INFO: 2epoch:train:3801-4275batch: iter_time=1.474e-04, forward_time=0.050, loss_ctc=133.374, loss_transducer=91.380, loss=32.848, backward_time=0.047, optim_step_time=0.031, optim0_lr0=2.031e-04, train_time=0.570 [gpu11] 2022-06-03 23:07:01,893 (trainer:672) INFO: 2epoch:train:4276-4750batch: iter_time=1.449e-04, forward_time=0.050, loss_ctc=127.299, loss_transducer=84.942, loss=30.783, backward_time=0.045, optim_step_time=0.031, optim0_lr0=2.102e-04, train_time=0.561 [gpu11] 2022-06-03 23:08:08,882 (trainer:672) INFO: 2epoch:train:4751-5225batch: iter_time=1.437e-04, forward_time=0.050, loss_ctc=131.192, loss_transducer=87.651, loss=31.752, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.174e-04, train_time=0.564 [gpu11] 2022-06-03 23:09:16,629 (trainer:672) INFO: 2epoch:train:5226-5700batch: iter_time=1.557e-04, forward_time=0.051, loss_ctc=127.636, loss_transducer=84.621, loss=30.728, backward_time=0.047, optim_step_time=0.032, optim0_lr0=2.245e-04, train_time=0.570 [gpu11] 2022-06-03 23:10:24,137 (trainer:672) INFO: 2epoch:train:5701-6175batch: iter_time=1.555e-04, forward_time=0.050, loss_ctc=131.058, loss_transducer=87.312, loss=31.657, backward_time=0.046, optim_step_time=0.032, optim0_lr0=2.316e-04, train_time=0.569 [gpu11] 2022-06-03 23:11:31,648 (trainer:672) INFO: 2epoch:train:6176-6650batch: iter_time=1.515e-04, forward_time=0.050, loss_ctc=123.849, loss_transducer=79.878, loss=29.258, backward_time=0.046, optim_step_time=0.032, optim0_lr0=2.387e-04, train_time=0.568 [gpu11] 2022-06-03 23:12:38,245 (trainer:672) INFO: 2epoch:train:6651-7125batch: iter_time=1.523e-04, forward_time=0.049, loss_ctc=125.726, loss_transducer=81.125, loss=29.711, backward_time=0.047, optim_step_time=0.032, optim0_lr0=2.459e-04, train_time=0.560 [gpu11] 2022-06-03 23:13:44,710 (trainer:672) INFO: 2epoch:train:7126-7600batch: iter_time=1.510e-04, forward_time=0.049, loss_ctc=127.353, loss_transducer=81.690, loss=29.974, backward_time=0.045, optim_step_time=0.031, optim0_lr0=2.530e-04, train_time=0.559 [gpu11] 2022-06-03 23:14:51,883 (trainer:672) INFO: 2epoch:train:7601-8075batch: iter_time=1.515e-04, forward_time=0.050, loss_ctc=125.757, loss_transducer=80.337, loss=29.516, backward_time=0.045, optim_step_time=0.031, optim0_lr0=2.601e-04, train_time=0.565 [gpu11] 2022-06-03 23:15:58,435 (trainer:672) INFO: 2epoch:train:8076-8550batch: iter_time=1.553e-04, forward_time=0.049, loss_ctc=131.955, loss_transducer=84.539, loss=31.031, backward_time=0.045, optim_step_time=0.031, optim0_lr0=2.672e-04, train_time=0.561 [gpu11] 2022-06-03 23:17:04,310 (trainer:672) INFO: 2epoch:train:8551-9025batch: iter_time=1.557e-04, forward_time=0.048, loss_ctc=132.983, loss_transducer=85.231, loss=31.281, backward_time=0.045, optim_step_time=0.031, optim0_lr0=2.744e-04, train_time=0.554 [gpu11] 2022-06-03 23:18:10,492 (trainer:672) INFO: 2epoch:train:9026-9500batch: iter_time=1.561e-04, forward_time=0.049, loss_ctc=130.984, loss_transducer=82.871, loss=30.541, backward_time=0.045, optim_step_time=0.031, optim0_lr0=2.815e-04, train_time=0.557 /speech/umeshs/espnet-v.202205/espnet2/train/reporter.py:79: UserWarning: No valid stats found warnings.warn("No valid stats found") [gpu11] 2022-06-03 23:18:25,682 (trainer:328) INFO: 2epoch results: [train] iter_time=1.783e-04, forward_time=0.050, loss_ctc=127.351, loss_transducer=86.783, loss=31.247, backward_time=0.046, optim_step_time=0.032, optim0_lr0=2.138e-04, train_time=0.565, time=22 minutes and 23.34 seconds, total_count=19006, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=166.679, cer_ctc=0.976, loss_transducer=100.546, cer_transducer=nan, wer_transducer=nan, loss=150.550, time=14.49 seconds, total_count=562, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-03 23:18:28,507 (trainer:376) INFO: The best model has been updated: valid.loss [gpu11] 2022-06-03 23:18:28,508 (trainer:262) INFO: 3/20epoch started. Estimated time to finish: 6 hours, 47 minutes and 2.58 seconds [gpu11] 2022-06-03 23:19:35,570 (trainer:672) INFO: 3epoch:train:1-475batch: iter_time=6.731e-04, forward_time=0.049, loss_ctc=129.722, loss_transducer=80.760, loss=29.919, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.886e-04, train_time=0.565 [gpu11] 2022-06-03 23:20:41,197 (trainer:672) INFO: 3epoch:train:476-950batch: iter_time=1.555e-04, forward_time=0.048, loss_ctc=125.429, loss_transducer=78.326, loss=28.989, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.957e-04, train_time=0.552 [gpu11] 2022-06-03 23:21:48,606 (trainer:672) INFO: 3epoch:train:951-1425batch: iter_time=1.582e-04, forward_time=0.049, loss_ctc=131.908, loss_transducer=81.948, loss=30.380, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.029e-04, train_time=0.567 [gpu11] 2022-06-03 23:22:54,706 (trainer:672) INFO: 3epoch:train:1426-1900batch: iter_time=1.542e-04, forward_time=0.049, loss_ctc=130.626, loss_transducer=81.193, loss=30.095, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.100e-04, train_time=0.556 [gpu11] 2022-06-03 23:24:02,241 (trainer:672) INFO: 3epoch:train:1901-2375batch: iter_time=1.995e-04, forward_time=0.050, loss_ctc=132.422, loss_transducer=80.927, loss=30.163, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.171e-04, train_time=0.569 [gpu11] 2022-06-03 23:25:08,780 (trainer:672) INFO: 3epoch:train:2376-2850batch: iter_time=1.913e-04, forward_time=0.049, loss_ctc=129.498, loss_transducer=79.639, loss=29.622, backward_time=0.046, optim_step_time=0.032, optim0_lr0=3.242e-04, train_time=0.560 [gpu11] 2022-06-03 23:26:15,438 (trainer:672) INFO: 3epoch:train:2851-3325batch: iter_time=2.083e-04, forward_time=0.049, loss_ctc=129.186, loss_transducer=78.812, loss=29.392, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.314e-04, train_time=0.561 [gpu11] 2022-06-03 23:27:22,818 (trainer:672) INFO: 3epoch:train:3326-3800batch: iter_time=1.692e-04, forward_time=0.050, loss_ctc=127.689, loss_transducer=77.235, loss=28.885, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.385e-04, train_time=0.567 [gpu11] 2022-06-03 23:28:29,933 (trainer:672) INFO: 3epoch:train:3801-4275batch: iter_time=1.606e-04, forward_time=0.050, loss_ctc=124.419, loss_transducer=74.896, loss=28.055, backward_time=0.047, optim_step_time=0.032, optim0_lr0=3.456e-04, train_time=0.565 [gpu11] 2022-06-03 23:29:37,013 (trainer:672) INFO: 3epoch:train:4276-4750batch: iter_time=1.523e-04, forward_time=0.049, loss_ctc=125.142, loss_transducer=74.888, loss=28.108, backward_time=0.047, optim_step_time=0.032, optim0_lr0=3.527e-04, train_time=0.565 [gpu11] 2022-06-03 23:30:44,913 (trainer:672) INFO: 3epoch:train:4751-5225batch: iter_time=1.934e-04, forward_time=0.050, loss_ctc=128.343, loss_transducer=75.765, loss=28.567, backward_time=0.047, optim_step_time=0.032, optim0_lr0=3.599e-04, train_time=0.571 [gpu11] 2022-06-03 23:31:52,695 (trainer:672) INFO: 3epoch:train:5226-5700batch: iter_time=1.951e-04, forward_time=0.050, loss_ctc=126.790, loss_transducer=75.524, loss=28.390, backward_time=0.046, optim_step_time=0.031, optim0_lr0=3.670e-04, train_time=0.570 [gpu11] 2022-06-03 23:33:00,231 (trainer:672) INFO: 3epoch:train:5701-6175batch: iter_time=1.817e-04, forward_time=0.050, loss_ctc=133.225, loss_transducer=80.143, loss=30.028, backward_time=0.047, optim_step_time=0.032, optim0_lr0=3.741e-04, train_time=0.569 [gpu11] 2022-06-03 23:34:08,589 (trainer:672) INFO: 3epoch:train:6176-6650batch: iter_time=1.480e-04, forward_time=0.051, loss_ctc=127.696, loss_transducer=75.543, loss=28.463, backward_time=0.046, optim_step_time=0.032, optim0_lr0=3.812e-04, train_time=0.576 [gpu11] 2022-06-03 23:35:15,695 (trainer:672) INFO: 3epoch:train:6651-7125batch: iter_time=1.560e-04, forward_time=0.050, loss_ctc=131.069, loss_transducer=77.523, loss=29.211, backward_time=0.046, optim_step_time=0.032, optim0_lr0=3.884e-04, train_time=0.565 [gpu11] 2022-06-03 23:36:22,350 (trainer:672) INFO: 3epoch:train:7126-7600batch: iter_time=1.541e-04, forward_time=0.050, loss_ctc=126.121, loss_transducer=75.244, loss=28.270, backward_time=0.046, optim_step_time=0.032, optim0_lr0=3.955e-04, train_time=0.561 [gpu11] 2022-06-03 23:37:30,192 (trainer:672) INFO: 3epoch:train:7601-8075batch: iter_time=1.515e-04, forward_time=0.050, loss_ctc=127.681, loss_transducer=75.462, loss=28.442, backward_time=0.048, optim_step_time=0.032, optim0_lr0=4.026e-04, train_time=0.571 [gpu11] 2022-06-03 23:38:37,792 (trainer:672) INFO: 3epoch:train:8076-8550batch: iter_time=1.631e-04, forward_time=0.050, loss_ctc=130.321, loss_transducer=76.701, loss=28.949, backward_time=0.047, optim_step_time=0.032, optim0_lr0=4.097e-04, train_time=0.569 [gpu11] 2022-06-03 23:39:44,824 (trainer:672) INFO: 3epoch:train:8551-9025batch: iter_time=1.474e-04, forward_time=0.049, loss_ctc=124.845, loss_transducer=72.159, loss=27.403, backward_time=0.047, optim_step_time=0.032, optim0_lr0=4.169e-04, train_time=0.564 [gpu11] 2022-06-03 23:40:52,376 (trainer:672) INFO: 3epoch:train:9026-9500batch: iter_time=1.515e-04, forward_time=0.049, loss_ctc=126.901, loss_transducer=73.383, loss=27.863, backward_time=0.047, optim_step_time=0.032, optim0_lr0=4.240e-04, train_time=0.568 [gpu11] 2022-06-03 23:41:07,586 (trainer:328) INFO: 3epoch results: [train] iter_time=1.932e-04, forward_time=0.050, loss_ctc=128.396, loss_transducer=77.259, loss=28.944, backward_time=0.046, optim_step_time=0.032, optim0_lr0=3.563e-04, train_time=0.566, time=22 minutes and 24.59 seconds, total_count=28509, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=165.865, cer_ctc=0.975, loss_transducer=91.813, cer_transducer=nan, wer_transducer=nan, loss=141.573, time=14.48 seconds, total_count=843, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-03 23:41:10,441 (trainer:376) INFO: The best model has been updated: valid.loss [gpu11] 2022-06-03 23:41:10,442 (trainer:262) INFO: 4/20epoch started. Estimated time to finish: 6 hours, 24 minutes and 54.81 seconds [gpu11] 2022-06-03 23:42:15,677 (trainer:672) INFO: 4epoch:train:1-475batch: iter_time=6.658e-04, forward_time=0.049, loss_ctc=127.232, loss_transducer=73.483, loss=27.913, backward_time=0.044, optim_step_time=0.032, optim0_lr0=4.311e-04, train_time=0.549 [gpu11] 2022-06-03 23:43:23,317 (trainer:672) INFO: 4epoch:train:476-950batch: iter_time=1.528e-04, forward_time=0.050, loss_ctc=132.947, loss_transducer=77.265, loss=29.287, backward_time=0.046, optim_step_time=0.032, optim0_lr0=4.382e-04, train_time=0.569 [gpu11] 2022-06-03 23:44:30,105 (trainer:672) INFO: 4epoch:train:951-1425batch: iter_time=1.506e-04, forward_time=0.049, loss_ctc=129.527, loss_transducer=74.733, loss=28.398, backward_time=0.046, optim_step_time=0.032, optim0_lr0=4.454e-04, train_time=0.562 [gpu11] 2022-06-03 23:45:36,523 (trainer:672) INFO: 4epoch:train:1426-1900batch: iter_time=1.508e-04, forward_time=0.049, loss_ctc=128.751, loss_transducer=73.261, loss=27.972, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.525e-04, train_time=0.559 [gpu11] 2022-06-03 23:46:43,042 (trainer:672) INFO: 4epoch:train:1901-2375batch: iter_time=1.446e-04, forward_time=0.049, loss_ctc=122.230, loss_transducer=68.337, loss=26.251, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.596e-04, train_time=0.561 [gpu11] 2022-06-03 23:47:49,749 (trainer:672) INFO: 4epoch:train:2376-2850batch: iter_time=1.541e-04, forward_time=0.050, loss_ctc=126.814, loss_transducer=71.545, loss=27.397, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.667e-04, train_time=0.561 [gpu11] 2022-06-03 23:48:56,449 (trainer:672) INFO: 4epoch:train:2851-3325batch: iter_time=1.517e-04, forward_time=0.049, loss_ctc=130.783, loss_transducer=75.005, loss=28.560, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.739e-04, train_time=0.561 [gpu11] 2022-06-03 23:50:02,532 (trainer:672) INFO: 4epoch:train:3326-3800batch: iter_time=1.639e-04, forward_time=0.049, loss_ctc=123.096, loss_transducer=69.852, loss=26.695, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.810e-04, train_time=0.556 [gpu11] 2022-06-03 23:51:08,551 (trainer:672) INFO: 4epoch:train:3801-4275batch: iter_time=1.607e-04, forward_time=0.049, loss_ctc=126.446, loss_transducer=71.703, loss=27.409, backward_time=0.044, optim_step_time=0.031, optim0_lr0=4.881e-04, train_time=0.556 [gpu11] 2022-06-03 23:52:16,778 (trainer:672) INFO: 4epoch:train:4276-4750batch: iter_time=1.693e-04, forward_time=0.052, loss_ctc=128.073, loss_transducer=72.832, loss=27.814, backward_time=0.047, optim_step_time=0.033, optim0_lr0=4.952e-04, train_time=0.574 [gpu11] 2022-06-03 23:53:23,753 (trainer:672) INFO: 4epoch:train:4751-5225batch: iter_time=1.676e-04, forward_time=0.050, loss_ctc=122.920, loss_transducer=68.512, loss=26.347, backward_time=0.046, optim_step_time=0.032, optim0_lr0=5.024e-04, train_time=0.564 [gpu11] 2022-06-03 23:54:30,986 (trainer:672) INFO: 4epoch:train:5226-5700batch: iter_time=1.675e-04, forward_time=0.049, loss_ctc=124.369, loss_transducer=69.622, loss=26.733, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.095e-04, train_time=0.566 [gpu11] 2022-06-03 23:55:38,841 (trainer:672) INFO: 4epoch:train:5701-6175batch: iter_time=1.677e-04, forward_time=0.050, loss_ctc=129.730, loss_transducer=73.052, loss=27.993, backward_time=0.046, optim_step_time=0.032, optim0_lr0=5.166e-04, train_time=0.572 [gpu11] 2022-06-03 23:56:47,813 (trainer:672) INFO: 4epoch:train:6176-6650batch: iter_time=1.718e-04, forward_time=0.052, loss_ctc=132.737, loss_transducer=75.180, loss=28.750, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.237e-04, train_time=0.580 [gpu11] 2022-06-03 23:57:56,015 (trainer:672) INFO: 4epoch:train:6651-7125batch: iter_time=1.769e-04, forward_time=0.051, loss_ctc=133.115, loss_transducer=74.816, loss=28.687, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.309e-04, train_time=0.574 [gpu11] 2022-06-03 23:59:03,527 (trainer:672) INFO: 4epoch:train:7126-7600batch: iter_time=1.596e-04, forward_time=0.050, loss_ctc=130.187, loss_transducer=73.050, loss=28.027, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.380e-04, train_time=0.568 [gpu11] 2022-06-04 00:00:10,322 (trainer:672) INFO: 4epoch:train:7601-8075batch: iter_time=1.589e-04, forward_time=0.050, loss_ctc=123.194, loss_transducer=67.978, loss=26.234, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.451e-04, train_time=0.563 [gpu11] 2022-06-04 00:01:17,273 (trainer:672) INFO: 4epoch:train:8076-8550batch: iter_time=1.543e-04, forward_time=0.050, loss_ctc=128.415, loss_transducer=71.729, loss=27.563, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.522e-04, train_time=0.563 [gpu11] 2022-06-04 00:02:25,274 (trainer:672) INFO: 4epoch:train:8551-9025batch: iter_time=1.569e-04, forward_time=0.050, loss_ctc=128.154, loss_transducer=72.241, loss=27.672, backward_time=0.046, optim_step_time=0.032, optim0_lr0=5.594e-04, train_time=0.573 [gpu11] 2022-06-04 00:03:32,853 (trainer:672) INFO: 4epoch:train:9026-9500batch: iter_time=1.572e-04, forward_time=0.050, loss_ctc=130.082, loss_transducer=72.552, loss=27.894, backward_time=0.046, optim_step_time=0.032, optim0_lr0=5.665e-04, train_time=0.569 [gpu11] 2022-06-04 00:03:48,047 (trainer:328) INFO: 4epoch results: [train] iter_time=1.851e-04, forward_time=0.050, loss_ctc=127.895, loss_transducer=72.301, loss=27.667, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.988e-04, train_time=0.565, time=22 minutes and 23.17 seconds, total_count=38012, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=166.041, cer_ctc=0.991, loss_transducer=85.877, cer_transducer=nan, wer_transducer=nan, loss=135.690, time=14.43 seconds, total_count=1124, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 00:03:50,891 (trainer:376) INFO: The best model has been updated: valid.loss [gpu11] 2022-06-04 00:03:50,925 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/1epoch.pth [gpu11] 2022-06-04 00:03:50,925 (trainer:262) INFO: 5/20epoch started. Estimated time to finish: 6 hours, 2 minutes and 24.15 seconds [gpu11] 2022-06-04 00:04:57,196 (trainer:672) INFO: 5epoch:train:1-475batch: iter_time=6.369e-04, forward_time=0.049, loss_ctc=125.669, loss_transducer=67.847, loss=26.387, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.736e-04, train_time=0.558 [gpu11] 2022-06-04 00:06:04,703 (trainer:672) INFO: 5epoch:train:476-950batch: iter_time=1.698e-04, forward_time=0.050, loss_ctc=126.754, loss_transducer=69.247, loss=26.818, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.807e-04, train_time=0.568 [gpu11] 2022-06-04 00:07:13,453 (trainer:672) INFO: 5epoch:train:951-1425batch: iter_time=1.897e-04, forward_time=0.053, loss_ctc=128.433, loss_transducer=69.295, loss=26.956, backward_time=0.048, optim_step_time=0.033, optim0_lr0=5.879e-04, train_time=0.579 [gpu11] 2022-06-04 00:08:21,484 (trainer:672) INFO: 5epoch:train:1426-1900batch: iter_time=1.844e-04, forward_time=0.051, loss_ctc=127.565, loss_transducer=70.447, loss=27.179, backward_time=0.047, optim_step_time=0.033, optim0_lr0=5.950e-04, train_time=0.572 [gpu11] 2022-06-04 00:09:28,612 (trainer:672) INFO: 5epoch:train:1901-2375batch: iter_time=1.904e-04, forward_time=0.050, loss_ctc=128.137, loss_transducer=69.412, loss=26.963, backward_time=0.047, optim_step_time=0.032, optim0_lr0=6.021e-04, train_time=0.566 [gpu11] 2022-06-04 00:10:35,248 (trainer:672) INFO: 5epoch:train:2376-2850batch: iter_time=1.602e-04, forward_time=0.050, loss_ctc=125.348, loss_transducer=68.408, loss=26.503, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.092e-04, train_time=0.560 [gpu11] 2022-06-04 00:11:42,000 (trainer:672) INFO: 5epoch:train:2851-3325batch: iter_time=1.496e-04, forward_time=0.049, loss_ctc=126.696, loss_transducer=68.250, loss=26.565, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.164e-04, train_time=0.562 [gpu11] 2022-06-04 00:12:48,713 (trainer:672) INFO: 5epoch:train:3326-3800batch: iter_time=1.610e-04, forward_time=0.049, loss_ctc=127.277, loss_transducer=68.848, loss=26.758, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.235e-04, train_time=0.561 [gpu11] 2022-06-04 00:13:54,977 (trainer:672) INFO: 5epoch:train:3801-4275batch: iter_time=1.544e-04, forward_time=0.049, loss_ctc=129.844, loss_transducer=70.276, loss=27.307, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.306e-04, train_time=0.558 [gpu11] 2022-06-04 00:15:01,549 (trainer:672) INFO: 5epoch:train:4276-4750batch: iter_time=1.487e-04, forward_time=0.049, loss_ctc=127.588, loss_transducer=68.871, loss=26.787, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.377e-04, train_time=0.560 [gpu11] 2022-06-04 00:16:08,583 (trainer:672) INFO: 5epoch:train:4751-5225batch: iter_time=1.465e-04, forward_time=0.049, loss_ctc=126.761, loss_transducer=68.282, loss=26.578, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.449e-04, train_time=0.564 [gpu11] 2022-06-04 00:17:15,889 (trainer:672) INFO: 5epoch:train:5226-5700batch: iter_time=1.539e-04, forward_time=0.049, loss_ctc=132.313, loss_transducer=71.394, loss=27.772, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.520e-04, train_time=0.566 [gpu11] 2022-06-04 00:18:22,726 (trainer:672) INFO: 5epoch:train:5701-6175batch: iter_time=1.575e-04, forward_time=0.049, loss_ctc=127.053, loss_transducer=68.320, loss=26.609, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.591e-04, train_time=0.563 [gpu11] 2022-06-04 00:19:29,290 (trainer:672) INFO: 5epoch:train:6176-6650batch: iter_time=1.631e-04, forward_time=0.049, loss_ctc=128.257, loss_transducer=69.754, loss=27.058, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.662e-04, train_time=0.561 [gpu11] 2022-06-04 00:20:36,350 (trainer:672) INFO: 5epoch:train:6651-7125batch: iter_time=1.579e-04, forward_time=0.049, loss_ctc=129.439, loss_transducer=70.148, loss=27.245, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.734e-04, train_time=0.564 [gpu11] 2022-06-04 00:21:43,369 (trainer:672) INFO: 5epoch:train:7126-7600batch: iter_time=1.765e-04, forward_time=0.049, loss_ctc=128.379, loss_transducer=69.173, loss=26.922, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.805e-04, train_time=0.564 [gpu11] 2022-06-04 00:22:50,771 (trainer:672) INFO: 5epoch:train:7601-8075batch: iter_time=1.894e-04, forward_time=0.049, loss_ctc=125.815, loss_transducer=67.933, loss=26.419, backward_time=0.046, optim_step_time=0.032, optim0_lr0=6.876e-04, train_time=0.567 [gpu11] 2022-06-04 00:23:57,483 (trainer:672) INFO: 5epoch:train:8076-8550batch: iter_time=1.721e-04, forward_time=0.049, loss_ctc=126.823, loss_transducer=67.863, loss=26.478, backward_time=0.046, optim_step_time=0.032, optim0_lr0=6.947e-04, train_time=0.562 [gpu11] 2022-06-04 00:25:05,193 (trainer:672) INFO: 5epoch:train:8551-9025batch: iter_time=1.864e-04, forward_time=0.050, loss_ctc=128.598, loss_transducer=69.068, loss=26.912, backward_time=0.046, optim_step_time=0.032, optim0_lr0=7.019e-04, train_time=0.570 [gpu11] 2022-06-04 00:26:12,750 (trainer:672) INFO: 5epoch:train:9026-9500batch: iter_time=1.801e-04, forward_time=0.050, loss_ctc=125.403, loss_transducer=65.554, loss=25.794, backward_time=0.046, optim_step_time=0.032, optim0_lr0=7.090e-04, train_time=0.569 [gpu11] 2022-06-04 00:26:27,914 (trainer:328) INFO: 5epoch results: [train] iter_time=1.914e-04, forward_time=0.050, loss_ctc=127.601, loss_transducer=68.913, loss=26.798, backward_time=0.046, optim_step_time=0.032, optim0_lr0=6.413e-04, train_time=0.565, time=22 minutes and 22.55 seconds, total_count=47515, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=160.827, cer_ctc=0.977, loss_transducer=80.918, cer_transducer=nan, wer_transducer=nan, loss=129.166, time=14.44 seconds, total_count=1405, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 00:26:30,719 (trainer:376) INFO: The best model has been updated: valid.loss [gpu11] 2022-06-04 00:26:30,748 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/2epoch.pth [gpu11] 2022-06-04 00:26:30,748 (trainer:262) INFO: 6/20epoch started. Estimated time to finish: 5 hours, 39 minutes and 47.58 seconds [gpu11] 2022-06-04 00:27:37,330 (trainer:672) INFO: 6epoch:train:1-475batch: iter_time=6.531e-04, forward_time=0.049, loss_ctc=128.956, loss_transducer=67.595, loss=26.570, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.161e-04, train_time=0.561 [gpu11] 2022-06-04 00:28:44,470 (trainer:672) INFO: 6epoch:train:476-950batch: iter_time=1.577e-04, forward_time=0.050, loss_ctc=130.583, loss_transducer=68.921, loss=27.024, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.232e-04, train_time=0.565 [gpu11] 2022-06-04 00:29:50,959 (trainer:672) INFO: 6epoch:train:951-1425batch: iter_time=1.455e-04, forward_time=0.049, loss_ctc=132.065, loss_transducer=69.696, loss=27.329, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.304e-04, train_time=0.559 [gpu11] 2022-06-04 00:30:58,969 (trainer:672) INFO: 6epoch:train:1426-1900batch: iter_time=1.433e-04, forward_time=0.050, loss_ctc=130.711, loss_transducer=68.969, loss=27.046, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.375e-04, train_time=0.572 [gpu11] 2022-06-04 00:32:06,673 (trainer:672) INFO: 6epoch:train:1901-2375batch: iter_time=1.507e-04, forward_time=0.050, loss_ctc=125.643, loss_transducer=65.430, loss=25.781, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.446e-04, train_time=0.571 [gpu11] 2022-06-04 00:33:13,209 (trainer:672) INFO: 6epoch:train:2376-2850batch: iter_time=1.569e-04, forward_time=0.049, loss_ctc=130.407, loss_transducer=68.291, loss=26.853, backward_time=0.044, optim_step_time=0.032, optim0_lr0=7.517e-04, train_time=0.560 [gpu11] 2022-06-04 00:34:18,711 (trainer:672) INFO: 6epoch:train:2851-3325batch: iter_time=1.437e-04, forward_time=0.048, loss_ctc=126.295, loss_transducer=66.653, loss=26.135, backward_time=0.044, optim_step_time=0.032, optim0_lr0=7.589e-04, train_time=0.551 [gpu11] 2022-06-04 00:35:25,358 (trainer:672) INFO: 6epoch:train:3326-3800batch: iter_time=1.429e-04, forward_time=0.049, loss_ctc=129.053, loss_transducer=67.887, loss=26.651, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.660e-04, train_time=0.561 [gpu11] 2022-06-04 00:36:31,928 (trainer:672) INFO: 6epoch:train:3801-4275batch: iter_time=1.484e-04, forward_time=0.049, loss_ctc=129.117, loss_transducer=67.681, loss=26.604, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.731e-04, train_time=0.561 [gpu11] 2022-06-04 00:37:39,151 (trainer:672) INFO: 6epoch:train:4276-4750batch: iter_time=1.527e-04, forward_time=0.049, loss_ctc=123.238, loss_transducer=63.885, loss=25.214, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.802e-04, train_time=0.566 [gpu11] 2022-06-04 00:38:45,199 (trainer:672) INFO: 6epoch:train:4751-5225batch: iter_time=1.538e-04, forward_time=0.049, loss_ctc=128.070, loss_transducer=67.036, loss=26.364, backward_time=0.044, optim_step_time=0.032, optim0_lr0=7.874e-04, train_time=0.556 [gpu11] 2022-06-04 00:39:51,627 (trainer:672) INFO: 6epoch:train:5226-5700batch: iter_time=1.539e-04, forward_time=0.049, loss_ctc=128.205, loss_transducer=67.124, loss=26.397, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.945e-04, train_time=0.559 [gpu11] 2022-06-04 00:40:58,952 (trainer:672) INFO: 6epoch:train:5701-6175batch: iter_time=1.539e-04, forward_time=0.049, loss_ctc=125.392, loss_transducer=65.177, loss=25.699, backward_time=0.045, optim_step_time=0.032, optim0_lr0=8.016e-04, train_time=0.567 [gpu11] 2022-06-04 00:42:06,319 (trainer:672) INFO: 6epoch:train:6176-6650batch: iter_time=1.727e-04, forward_time=0.050, loss_ctc=126.673, loss_transducer=66.500, loss=26.125, backward_time=0.046, optim_step_time=0.032, optim0_lr0=8.087e-04, train_time=0.567 [gpu11] 2022-06-04 00:43:12,912 (trainer:672) INFO: 6epoch:train:6651-7125batch: iter_time=1.528e-04, forward_time=0.049, loss_ctc=126.119, loss_transducer=65.130, loss=25.742, backward_time=0.046, optim_step_time=0.032, optim0_lr0=8.159e-04, train_time=0.560 [gpu11] 2022-06-04 00:44:20,646 (trainer:672) INFO: 6epoch:train:7126-7600batch: iter_time=1.768e-04, forward_time=0.050, loss_ctc=124.915, loss_transducer=64.554, loss=25.507, backward_time=0.046, optim_step_time=0.032, optim0_lr0=8.230e-04, train_time=0.570 [gpu11] 2022-06-04 00:45:27,899 (trainer:672) INFO: 6epoch:train:7601-8075batch: iter_time=1.731e-04, forward_time=0.050, loss_ctc=124.088, loss_transducer=64.170, loss=25.349, backward_time=0.047, optim_step_time=0.032, optim0_lr0=8.301e-04, train_time=0.567 [gpu11] 2022-06-04 00:46:34,319 (trainer:672) INFO: 6epoch:train:8076-8550batch: iter_time=1.660e-04, forward_time=0.049, loss_ctc=127.670, loss_transducer=65.423, loss=25.931, backward_time=0.046, optim_step_time=0.032, optim0_lr0=8.372e-04, train_time=0.559 [gpu11] 2022-06-04 00:47:41,236 (trainer:672) INFO: 6epoch:train:8551-9025batch: iter_time=1.644e-04, forward_time=0.049, loss_ctc=122.770, loss_transducer=62.752, loss=24.896, backward_time=0.046, optim_step_time=0.032, optim0_lr0=8.444e-04, train_time=0.563 [gpu11] 2022-06-04 00:48:48,068 (trainer:672) INFO: 6epoch:train:9026-9500batch: iter_time=1.725e-04, forward_time=0.049, loss_ctc=125.603, loss_transducer=64.831, loss=25.628, backward_time=0.046, optim_step_time=0.032, optim0_lr0=8.515e-04, train_time=0.562 [gpu11] 2022-06-04 00:49:03,275 (trainer:328) INFO: 6epoch results: [train] iter_time=1.817e-04, forward_time=0.049, loss_ctc=127.239, loss_transducer=66.357, loss=26.132, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.838e-04, train_time=0.563, time=22 minutes and 18.04 seconds, total_count=57018, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=162.256, cer_ctc=0.971, loss_transducer=76.968, cer_transducer=nan, wer_transducer=nan, loss=125.644, time=14.48 seconds, total_count=1686, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 00:49:06,259 (trainer:376) INFO: The best model has been updated: valid.loss [gpu11] 2022-06-04 00:49:06,286 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/3epoch.pth [gpu11] 2022-06-04 00:49:06,286 (trainer:262) INFO: 7/20epoch started. Estimated time to finish: 5 hours, 16 minutes and 59.93 seconds [gpu11] 2022-06-04 00:50:11,980 (trainer:672) INFO: 7epoch:train:1-475batch: iter_time=6.362e-04, forward_time=0.048, loss_ctc=124.777, loss_transducer=62.352, loss=24.946, backward_time=0.044, optim_step_time=0.032, optim0_lr0=8.586e-04, train_time=0.553 [gpu11] 2022-06-04 00:51:19,069 (trainer:672) INFO: 7epoch:train:476-950batch: iter_time=1.553e-04, forward_time=0.049, loss_ctc=131.370, loss_transducer=65.608, loss=26.255, backward_time=0.046, optim_step_time=0.032, optim0_lr0=8.657e-04, train_time=0.565 [gpu11] 2022-06-04 00:52:26,032 (trainer:672) INFO: 7epoch:train:951-1425batch: iter_time=1.586e-04, forward_time=0.049, loss_ctc=127.003, loss_transducer=62.574, loss=25.169, backward_time=0.045, optim_step_time=0.032, optim0_lr0=8.729e-04, train_time=0.563 [gpu11] 2022-06-04 00:53:32,067 (trainer:672) INFO: 7epoch:train:1426-1900batch: iter_time=1.574e-04, forward_time=0.048, loss_ctc=124.489, loss_transducer=61.889, loss=24.809, backward_time=0.045, optim_step_time=0.032, optim0_lr0=8.800e-04, train_time=0.555 [gpu11] 2022-06-04 00:54:38,478 (trainer:672) INFO: 7epoch:train:1901-2375batch: iter_time=1.501e-04, forward_time=0.049, loss_ctc=127.680, loss_transducer=64.303, loss=25.652, backward_time=0.045, optim_step_time=0.032, optim0_lr0=8.871e-04, train_time=0.559 [gpu11] 2022-06-04 00:55:44,987 (trainer:672) INFO: 7epoch:train:2376-2850batch: iter_time=1.588e-04, forward_time=0.049, loss_ctc=124.907, loss_transducer=63.278, loss=25.188, backward_time=0.045, optim_step_time=0.032, optim0_lr0=8.942e-04, train_time=0.560 [gpu11] 2022-06-04 00:56:51,252 (trainer:672) INFO: 7epoch:train:2851-3325batch: iter_time=1.607e-04, forward_time=0.049, loss_ctc=126.298, loss_transducer=64.588, loss=25.619, backward_time=0.045, optim_step_time=0.032, optim0_lr0=9.014e-04, train_time=0.557 [gpu11] 2022-06-04 00:57:58,244 (trainer:672) INFO: 7epoch:train:3326-3800batch: iter_time=1.594e-04, forward_time=0.049, loss_ctc=126.156, loss_transducer=65.461, loss=25.827, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.085e-04, train_time=0.564 [gpu11] 2022-06-04 00:59:04,658 (trainer:672) INFO: 7epoch:train:3801-4275batch: iter_time=1.668e-04, forward_time=0.049, loss_ctc=127.488, loss_transducer=65.691, loss=25.984, backward_time=0.045, optim_step_time=0.032, optim0_lr0=9.156e-04, train_time=0.560 [gpu11] 2022-06-04 01:00:11,064 (trainer:672) INFO: 7epoch:train:4276-4750batch: iter_time=1.637e-04, forward_time=0.049, loss_ctc=123.920, loss_transducer=63.990, loss=25.292, backward_time=0.045, optim_step_time=0.032, optim0_lr0=9.227e-04, train_time=0.558 [gpu11] 2022-06-04 01:01:18,501 (trainer:672) INFO: 7epoch:train:4751-5225batch: iter_time=1.560e-04, forward_time=0.051, loss_ctc=127.470, loss_transducer=66.005, loss=26.062, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.299e-04, train_time=0.568 [gpu11] 2022-06-04 01:02:26,025 (trainer:672) INFO: 7epoch:train:5226-5700batch: iter_time=1.536e-04, forward_time=0.051, loss_ctc=128.245, loss_transducer=66.823, loss=26.324, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.370e-04, train_time=0.568 [gpu11] 2022-06-04 01:03:32,889 (trainer:672) INFO: 7epoch:train:5701-6175batch: iter_time=1.483e-04, forward_time=0.050, loss_ctc=129.016, loss_transducer=67.440, loss=26.536, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.441e-04, train_time=0.563 [gpu11] 2022-06-04 01:04:40,409 (trainer:672) INFO: 7epoch:train:6176-6650batch: iter_time=1.518e-04, forward_time=0.050, loss_ctc=123.654, loss_transducer=63.399, loss=25.124, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.512e-04, train_time=0.569 [gpu11] 2022-06-04 01:05:46,958 (trainer:672) INFO: 7epoch:train:6651-7125batch: iter_time=1.578e-04, forward_time=0.050, loss_ctc=126.850, loss_transducer=66.582, loss=26.159, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.584e-04, train_time=0.560 [gpu11] 2022-06-04 01:06:54,868 (trainer:672) INFO: 7epoch:train:7126-7600batch: iter_time=1.780e-04, forward_time=0.050, loss_ctc=125.325, loss_transducer=64.570, loss=25.542, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.655e-04, train_time=0.571 [gpu11] 2022-06-04 01:08:01,635 (trainer:672) INFO: 7epoch:train:7601-8075batch: iter_time=1.804e-04, forward_time=0.049, loss_ctc=127.693, loss_transducer=65.742, loss=26.013, backward_time=0.045, optim_step_time=0.032, optim0_lr0=9.726e-04, train_time=0.562 [gpu11] 2022-06-04 01:09:08,837 (trainer:672) INFO: 7epoch:train:8076-8550batch: iter_time=1.777e-04, forward_time=0.049, loss_ctc=131.192, loss_transducer=68.315, loss=26.918, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.797e-04, train_time=0.566 [gpu11] 2022-06-04 01:10:16,481 (trainer:672) INFO: 7epoch:train:8551-9025batch: iter_time=1.730e-04, forward_time=0.050, loss_ctc=131.575, loss_transducer=69.177, loss=27.162, backward_time=0.045, optim_step_time=0.032, optim0_lr0=9.869e-04, train_time=0.569 [gpu11] 2022-06-04 01:11:24,171 (trainer:672) INFO: 7epoch:train:9026-9500batch: iter_time=1.616e-04, forward_time=0.050, loss_ctc=129.670, loss_transducer=67.171, loss=26.518, backward_time=0.045, optim_step_time=0.032, optim0_lr0=9.940e-04, train_time=0.569 [gpu11] 2022-06-04 01:11:39,450 (trainer:328) INFO: 7epoch results: [train] iter_time=1.853e-04, forward_time=0.049, loss_ctc=127.190, loss_transducer=65.226, loss=25.846, backward_time=0.046, optim_step_time=0.032, optim0_lr0=9.263e-04, train_time=0.563, time=22 minutes and 18.62 seconds, total_count=66521, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=159.069, cer_ctc=0.973, loss_transducer=77.188, cer_transducer=nan, wer_transducer=nan, loss=124.909, time=14.54 seconds, total_count=1967, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 01:11:42,461 (trainer:376) INFO: The best model has been updated: valid.loss [gpu11] 2022-06-04 01:11:42,491 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/4epoch.pth [gpu11] 2022-06-04 01:11:42,491 (trainer:262) INFO: 8/20epoch started. Estimated time to finish: 4 hours, 54 minutes and 16.98 seconds [gpu11] 2022-06-04 01:12:49,630 (trainer:672) INFO: 8epoch:train:1-475batch: iter_time=6.314e-04, forward_time=0.050, loss_ctc=128.181, loss_transducer=65.899, loss=26.088, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 01:13:56,522 (trainer:672) INFO: 8epoch:train:476-950batch: iter_time=1.637e-04, forward_time=0.049, loss_ctc=131.430, loss_transducer=67.147, loss=26.644, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 01:15:03,678 (trainer:672) INFO: 8epoch:train:951-1425batch: iter_time=1.843e-04, forward_time=0.050, loss_ctc=126.038, loss_transducer=63.492, loss=25.326, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 01:16:11,300 (trainer:672) INFO: 8epoch:train:1426-1900batch: iter_time=1.925e-04, forward_time=0.050, loss_ctc=129.030, loss_transducer=65.671, loss=26.095, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 01:17:18,867 (trainer:672) INFO: 8epoch:train:1901-2375batch: iter_time=1.902e-04, forward_time=0.050, loss_ctc=128.865, loss_transducer=64.176, loss=25.709, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 01:18:26,909 (trainer:672) INFO: 8epoch:train:2376-2850batch: iter_time=1.914e-04, forward_time=0.051, loss_ctc=129.374, loss_transducer=64.556, loss=25.842, backward_time=0.048, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.573 [gpu11] 2022-06-04 01:19:35,797 (trainer:672) INFO: 8epoch:train:2851-3325batch: iter_time=1.713e-04, forward_time=0.051, loss_ctc=129.699, loss_transducer=65.699, loss=26.152, backward_time=0.050, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.579 [gpu11] 2022-06-04 01:20:43,676 (trainer:672) INFO: 8epoch:train:3326-3800batch: iter_time=1.671e-04, forward_time=0.050, loss_ctc=126.771, loss_transducer=63.183, loss=25.304, backward_time=0.050, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.571 [gpu11] 2022-06-04 01:21:51,631 (trainer:672) INFO: 8epoch:train:3801-4275batch: iter_time=1.666e-04, forward_time=0.050, loss_ctc=128.613, loss_transducer=64.796, loss=25.845, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.573 [gpu11] 2022-06-04 01:22:59,001 (trainer:672) INFO: 8epoch:train:4276-4750batch: iter_time=2.105e-04, forward_time=0.050, loss_ctc=126.160, loss_transducer=63.276, loss=25.281, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 01:24:06,789 (trainer:672) INFO: 8epoch:train:4751-5225batch: iter_time=2.098e-04, forward_time=0.050, loss_ctc=125.453, loss_transducer=63.521, loss=25.289, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 01:25:14,867 (trainer:672) INFO: 8epoch:train:5226-5700batch: iter_time=1.747e-04, forward_time=0.050, loss_ctc=127.420, loss_transducer=63.561, loss=25.447, backward_time=0.049, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.573 [gpu11] 2022-06-04 01:26:23,142 (trainer:672) INFO: 8epoch:train:5701-6175batch: iter_time=1.724e-04, forward_time=0.051, loss_ctc=125.878, loss_transducer=62.614, loss=25.094, backward_time=0.049, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.575 [gpu11] 2022-06-04 01:27:31,536 (trainer:672) INFO: 8epoch:train:6176-6650batch: iter_time=1.623e-04, forward_time=0.051, loss_ctc=125.890, loss_transducer=63.373, loss=25.285, backward_time=0.049, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.576 [gpu11] 2022-06-04 01:28:39,338 (trainer:672) INFO: 8epoch:train:6651-7125batch: iter_time=1.619e-04, forward_time=0.051, loss_ctc=130.866, loss_transducer=66.876, loss=26.534, backward_time=0.048, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 01:29:46,379 (trainer:672) INFO: 8epoch:train:7126-7600batch: iter_time=1.786e-04, forward_time=0.050, loss_ctc=122.100, loss_transducer=62.189, loss=24.705, backward_time=0.047, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 01:30:53,599 (trainer:672) INFO: 8epoch:train:7601-8075batch: iter_time=1.641e-04, forward_time=0.050, loss_ctc=124.890, loss_transducer=63.916, loss=25.346, backward_time=0.049, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 01:32:02,339 (trainer:672) INFO: 8epoch:train:8076-8550batch: iter_time=1.803e-04, forward_time=0.051, loss_ctc=130.983, loss_transducer=67.926, loss=26.805, backward_time=0.048, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.578 [gpu11] 2022-06-04 01:33:11,301 (trainer:672) INFO: 8epoch:train:8551-9025batch: iter_time=1.925e-04, forward_time=0.052, loss_ctc=124.492, loss_transducer=62.932, loss=25.070, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.580 [gpu11] 2022-06-04 01:34:19,244 (trainer:672) INFO: 8epoch:train:9026-9500batch: iter_time=1.867e-04, forward_time=0.052, loss_ctc=122.171, loss_transducer=62.251, loss=24.726, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.572 [gpu11] 2022-06-04 01:34:34,723 (trainer:328) INFO: 8epoch results: [train] iter_time=2.026e-04, forward_time=0.050, loss_ctc=127.186, loss_transducer=64.339, loss=25.624, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.571, time=22 minutes and 37.52 seconds, total_count=76024, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=167.400, cer_ctc=0.975, loss_transducer=77.063, cer_transducer=nan, wer_transducer=nan, loss=127.283, time=14.71 seconds, total_count=2248, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 01:34:37,724 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 01:34:37,752 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/5epoch.pth [gpu11] 2022-06-04 01:34:37,752 (trainer:262) INFO: 9/20epoch started. Estimated time to finish: 4 hours, 32 minutes and 4.3 seconds [gpu11] 2022-06-04 01:35:44,627 (trainer:672) INFO: 9epoch:train:1-475batch: iter_time=5.877e-04, forward_time=0.049, loss_ctc=123.999, loss_transducer=61.060, loss=24.565, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 01:36:53,233 (trainer:672) INFO: 9epoch:train:476-950batch: iter_time=1.686e-04, forward_time=0.051, loss_ctc=132.667, loss_transducer=66.018, loss=26.454, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.577 [gpu11] 2022-06-04 01:38:00,471 (trainer:672) INFO: 9epoch:train:951-1425batch: iter_time=1.687e-04, forward_time=0.050, loss_ctc=125.593, loss_transducer=62.097, loss=24.944, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 01:39:08,204 (trainer:672) INFO: 9epoch:train:1426-1900batch: iter_time=1.693e-04, forward_time=0.050, loss_ctc=129.433, loss_transducer=65.255, loss=26.021, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 01:40:15,397 (trainer:672) INFO: 9epoch:train:1901-2375batch: iter_time=1.732e-04, forward_time=0.050, loss_ctc=124.605, loss_transducer=63.131, loss=25.128, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 01:41:22,162 (trainer:672) INFO: 9epoch:train:2376-2850batch: iter_time=1.601e-04, forward_time=0.050, loss_ctc=126.018, loss_transducer=64.988, loss=25.698, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 01:42:28,789 (trainer:672) INFO: 9epoch:train:2851-3325batch: iter_time=1.569e-04, forward_time=0.050, loss_ctc=124.082, loss_transducer=63.829, loss=25.263, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 01:43:36,324 (trainer:672) INFO: 9epoch:train:3326-3800batch: iter_time=1.523e-04, forward_time=0.050, loss_ctc=126.371, loss_transducer=65.020, loss=25.733, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.568 [gpu11] 2022-06-04 01:44:43,559 (trainer:672) INFO: 9epoch:train:3801-4275batch: iter_time=1.546e-04, forward_time=0.050, loss_ctc=123.381, loss_transducer=62.900, loss=24.979, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 01:45:51,883 (trainer:672) INFO: 9epoch:train:4276-4750batch: iter_time=1.512e-04, forward_time=0.051, loss_ctc=128.310, loss_transducer=68.078, loss=26.643, backward_time=0.048, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.575 [gpu11] 2022-06-04 01:46:59,651 (trainer:672) INFO: 9epoch:train:4751-5225batch: iter_time=1.622e-04, forward_time=0.051, loss_ctc=133.070, loss_transducer=70.847, loss=27.692, backward_time=0.047, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.571 [gpu11] 2022-06-04 01:48:07,041 (trainer:672) INFO: 9epoch:train:5226-5700batch: iter_time=1.655e-04, forward_time=0.050, loss_ctc=127.125, loss_transducer=67.631, loss=26.442, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 01:49:15,349 (trainer:672) INFO: 9epoch:train:5701-6175batch: iter_time=1.754e-04, forward_time=0.051, loss_ctc=124.078, loss_transducer=63.510, loss=25.183, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.576 [gpu11] 2022-06-04 01:50:23,234 (trainer:672) INFO: 9epoch:train:6176-6650batch: iter_time=1.683e-04, forward_time=0.051, loss_ctc=126.677, loss_transducer=66.047, loss=26.013, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.571 [gpu11] 2022-06-04 01:51:31,398 (trainer:672) INFO: 9epoch:train:6651-7125batch: iter_time=1.579e-04, forward_time=0.051, loss_ctc=128.998, loss_transducer=67.121, loss=26.455, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.574 [gpu11] 2022-06-04 01:52:38,788 (trainer:672) INFO: 9epoch:train:7126-7600batch: iter_time=1.553e-04, forward_time=0.050, loss_ctc=129.852, loss_transducer=67.259, loss=26.554, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 01:53:46,299 (trainer:672) INFO: 9epoch:train:7601-8075batch: iter_time=1.550e-04, forward_time=0.051, loss_ctc=129.863, loss_transducer=67.746, loss=26.676, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 01:54:52,868 (trainer:672) INFO: 9epoch:train:8076-8550batch: iter_time=1.638e-04, forward_time=0.050, loss_ctc=128.167, loss_transducer=65.905, loss=26.089, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 01:55:59,856 (trainer:672) INFO: 9epoch:train:8551-9025batch: iter_time=1.666e-04, forward_time=0.050, loss_ctc=128.318, loss_transducer=67.096, loss=26.398, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 01:57:06,507 (trainer:672) INFO: 9epoch:train:9026-9500batch: iter_time=1.593e-04, forward_time=0.049, loss_ctc=130.923, loss_transducer=68.549, loss=26.956, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 01:57:21,679 (trainer:328) INFO: 9epoch results: [train] iter_time=1.836e-04, forward_time=0.050, loss_ctc=127.522, loss_transducer=65.666, loss=25.981, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.568, time=22 minutes and 29.47 seconds, total_count=85527, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=161.872, cer_ctc=0.968, loss_transducer=77.954, cer_transducer=nan, wer_transducer=nan, loss=126.515, time=14.46 seconds, total_count=2529, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 01:57:24,795 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 01:57:24,823 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/8epoch.pth [gpu11] 2022-06-04 01:57:24,823 (trainer:262) INFO: 10/20epoch started. Estimated time to finish: 4 hours, 9 minutes and 32.14 seconds [gpu11] 2022-06-04 01:58:32,368 (trainer:672) INFO: 10epoch:train:1-475batch: iter_time=5.756e-04, forward_time=0.050, loss_ctc=134.129, loss_transducer=67.611, loss=26.962, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 01:59:39,530 (trainer:672) INFO: 10epoch:train:476-950batch: iter_time=1.527e-04, forward_time=0.050, loss_ctc=128.870, loss_transducer=65.972, loss=26.158, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 02:00:47,350 (trainer:672) INFO: 10epoch:train:951-1425batch: iter_time=1.572e-04, forward_time=0.050, loss_ctc=128.164, loss_transducer=65.049, loss=25.875, backward_time=0.048, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.571 [gpu11] 2022-06-04 02:01:55,060 (trainer:672) INFO: 10epoch:train:1426-1900batch: iter_time=1.662e-04, forward_time=0.051, loss_ctc=131.076, loss_transducer=66.265, loss=26.397, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 02:03:02,664 (trainer:672) INFO: 10epoch:train:1901-2375batch: iter_time=1.627e-04, forward_time=0.050, loss_ctc=129.466, loss_transducer=65.915, loss=26.189, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 02:04:09,405 (trainer:672) INFO: 10epoch:train:2376-2850batch: iter_time=1.682e-04, forward_time=0.049, loss_ctc=123.179, loss_transducer=60.969, loss=24.481, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 02:05:15,687 (trainer:672) INFO: 10epoch:train:2851-3325batch: iter_time=1.757e-04, forward_time=0.049, loss_ctc=126.439, loss_transducer=64.052, loss=25.496, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 02:06:24,080 (trainer:672) INFO: 10epoch:train:3326-3800batch: iter_time=1.798e-04, forward_time=0.051, loss_ctc=126.185, loss_transducer=64.484, loss=25.585, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.575 [gpu11] 2022-06-04 02:07:31,882 (trainer:672) INFO: 10epoch:train:3801-4275batch: iter_time=1.716e-04, forward_time=0.050, loss_ctc=126.647, loss_transducer=64.753, loss=25.687, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.571 [gpu11] 2022-06-04 02:08:39,711 (trainer:672) INFO: 10epoch:train:4276-4750batch: iter_time=1.720e-04, forward_time=0.050, loss_ctc=127.304, loss_transducer=66.110, loss=26.075, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.571 [gpu11] 2022-06-04 02:09:47,050 (trainer:672) INFO: 10epoch:train:4751-5225batch: iter_time=1.568e-04, forward_time=0.050, loss_ctc=126.258, loss_transducer=65.187, loss=25.766, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 02:10:53,449 (trainer:672) INFO: 10epoch:train:5226-5700batch: iter_time=1.562e-04, forward_time=0.049, loss_ctc=127.039, loss_transducer=66.130, loss=26.060, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 02:12:00,770 (trainer:672) INFO: 10epoch:train:5701-6175batch: iter_time=1.702e-04, forward_time=0.050, loss_ctc=125.442, loss_transducer=65.428, loss=25.765, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 02:13:06,453 (trainer:672) INFO: 10epoch:train:6176-6650batch: iter_time=1.576e-04, forward_time=0.049, loss_ctc=123.933, loss_transducer=64.550, loss=25.432, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.553 [gpu11] 2022-06-04 02:14:14,264 (trainer:672) INFO: 10epoch:train:6651-7125batch: iter_time=1.608e-04, forward_time=0.051, loss_ctc=131.786, loss_transducer=69.351, loss=27.222, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.571 [gpu11] 2022-06-04 02:15:21,679 (trainer:672) INFO: 10epoch:train:7126-7600batch: iter_time=1.734e-04, forward_time=0.050, loss_ctc=123.984, loss_transducer=64.032, loss=25.307, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 02:16:30,417 (trainer:672) INFO: 10epoch:train:7601-8075batch: iter_time=1.796e-04, forward_time=0.051, loss_ctc=129.128, loss_transducer=67.422, loss=26.540, backward_time=0.048, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.579 [gpu11] 2022-06-04 02:17:38,599 (trainer:672) INFO: 10epoch:train:8076-8550batch: iter_time=1.568e-04, forward_time=0.050, loss_ctc=130.798, loss_transducer=67.967, loss=26.802, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.574 [gpu11] 2022-06-04 02:18:45,928 (trainer:672) INFO: 10epoch:train:8551-9025batch: iter_time=1.903e-04, forward_time=0.050, loss_ctc=124.325, loss_transducer=64.688, loss=25.496, backward_time=0.047, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 02:19:55,392 (trainer:672) INFO: 10epoch:train:9026-9500batch: iter_time=2.014e-04, forward_time=0.053, loss_ctc=129.255, loss_transducer=67.331, loss=26.527, backward_time=0.049, optim_step_time=0.036, optim0_lr0=0.001, train_time=0.584 [gpu11] 2022-06-04 02:20:11,171 (trainer:328) INFO: 10epoch results: [train] iter_time=1.892e-04, forward_time=0.050, loss_ctc=127.655, loss_transducer=65.661, loss=25.989, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.568, time=22 minutes and 31.39 seconds, total_count=95030, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=160.643, cer_ctc=0.970, loss_transducer=77.788, cer_transducer=nan, wer_transducer=nan, loss=125.981, time=14.96 seconds, total_count=2810, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 02:20:14,123 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 02:20:14,165 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/9epoch.pth [gpu11] 2022-06-04 02:20:14,166 (trainer:262) INFO: 11/20epoch started. Estimated time to finish: 3 hours, 46 minutes and 59.28 seconds [gpu11] 2022-06-04 02:21:23,251 (trainer:672) INFO: 11epoch:train:1-475batch: iter_time=7.138e-04, forward_time=0.052, loss_ctc=131.578, loss_transducer=66.121, loss=26.398, backward_time=0.049, optim_step_time=0.035, optim0_lr0=0.001, train_time=0.582 [gpu11] 2022-06-04 02:22:32,477 (trainer:672) INFO: 11epoch:train:476-950batch: iter_time=1.969e-04, forward_time=0.052, loss_ctc=131.697, loss_transducer=65.647, loss=26.289, backward_time=0.049, optim_step_time=0.035, optim0_lr0=0.001, train_time=0.583 [gpu11] 2022-06-04 02:23:40,770 (trainer:672) INFO: 11epoch:train:951-1425batch: iter_time=2.096e-04, forward_time=0.051, loss_ctc=127.755, loss_transducer=63.546, loss=25.468, backward_time=0.047, optim_step_time=0.035, optim0_lr0=0.001, train_time=0.575 [gpu11] 2022-06-04 02:24:48,410 (trainer:672) INFO: 11epoch:train:1426-1900batch: iter_time=2.052e-04, forward_time=0.051, loss_ctc=125.819, loss_transducer=62.139, loss=24.971, backward_time=0.047, optim_step_time=0.035, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 02:25:56,562 (trainer:672) INFO: 11epoch:train:1901-2375batch: iter_time=2.037e-04, forward_time=0.051, loss_ctc=127.915, loss_transducer=63.208, loss=25.396, backward_time=0.047, optim_step_time=0.035, optim0_lr0=0.001, train_time=0.574 [gpu11] 2022-06-04 02:27:04,371 (trainer:672) INFO: 11epoch:train:2376-2850batch: iter_time=2.007e-04, forward_time=0.051, loss_ctc=124.372, loss_transducer=60.061, loss=24.343, backward_time=0.047, optim_step_time=0.035, optim0_lr0=0.001, train_time=0.571 [gpu11] 2022-06-04 02:28:13,113 (trainer:672) INFO: 11epoch:train:2851-3325batch: iter_time=1.973e-04, forward_time=0.051, loss_ctc=129.041, loss_transducer=64.917, loss=25.907, backward_time=0.047, optim_step_time=0.035, optim0_lr0=0.001, train_time=0.579 [gpu11] 2022-06-04 02:29:21,351 (trainer:672) INFO: 11epoch:train:3326-3800batch: iter_time=1.800e-04, forward_time=0.051, loss_ctc=127.979, loss_transducer=64.557, loss=25.738, backward_time=0.047, optim_step_time=0.035, optim0_lr0=0.001, train_time=0.574 [gpu11] 2022-06-04 02:30:30,085 (trainer:672) INFO: 11epoch:train:3801-4275batch: iter_time=2.157e-04, forward_time=0.052, loss_ctc=128.173, loss_transducer=64.209, loss=25.665, backward_time=0.048, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.579 [gpu11] 2022-06-04 02:31:37,222 (trainer:672) INFO: 11epoch:train:4276-4750batch: iter_time=1.735e-04, forward_time=0.050, loss_ctc=125.969, loss_transducer=63.361, loss=25.288, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 02:32:45,278 (trainer:672) INFO: 11epoch:train:4751-5225batch: iter_time=1.760e-04, forward_time=0.051, loss_ctc=125.887, loss_transducer=64.998, loss=25.691, backward_time=0.048, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.573 [gpu11] 2022-06-04 02:33:52,198 (trainer:672) INFO: 11epoch:train:5226-5700batch: iter_time=1.564e-04, forward_time=0.050, loss_ctc=126.406, loss_transducer=65.616, loss=25.884, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 02:34:59,837 (trainer:672) INFO: 11epoch:train:5701-6175batch: iter_time=1.439e-04, forward_time=0.051, loss_ctc=121.680, loss_transducer=62.123, loss=24.657, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 02:36:07,181 (trainer:672) INFO: 11epoch:train:6176-6650batch: iter_time=1.608e-04, forward_time=0.050, loss_ctc=127.602, loss_transducer=66.275, loss=26.139, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 02:37:13,583 (trainer:672) INFO: 11epoch:train:6651-7125batch: iter_time=1.527e-04, forward_time=0.049, loss_ctc=126.308, loss_transducer=65.721, loss=25.903, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 02:38:21,145 (trainer:672) INFO: 11epoch:train:7126-7600batch: iter_time=1.665e-04, forward_time=0.051, loss_ctc=133.697, loss_transducer=71.193, loss=27.826, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.568 [gpu11] 2022-06-04 02:39:28,398 (trainer:672) INFO: 11epoch:train:7601-8075batch: iter_time=1.777e-04, forward_time=0.051, loss_ctc=128.292, loss_transducer=68.045, loss=26.633, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 02:40:35,112 (trainer:672) INFO: 11epoch:train:8076-8550batch: iter_time=1.518e-04, forward_time=0.050, loss_ctc=126.570, loss_transducer=67.072, loss=26.261, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 02:41:41,702 (trainer:672) INFO: 11epoch:train:8551-9025batch: iter_time=1.508e-04, forward_time=0.049, loss_ctc=126.741, loss_transducer=66.666, loss=26.172, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 02:42:49,697 (trainer:672) INFO: 11epoch:train:9026-9500batch: iter_time=1.555e-04, forward_time=0.050, loss_ctc=129.578, loss_transducer=68.532, loss=26.851, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.572 [gpu11] 2022-06-04 02:43:04,822 (trainer:328) INFO: 11epoch results: [train] iter_time=2.044e-04, forward_time=0.051, loss_ctc=127.641, loss_transducer=65.190, loss=25.871, backward_time=0.047, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.571, time=22 minutes and 36.22 seconds, total_count=104533, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=168.557, cer_ctc=0.972, loss_transducer=80.608, cer_transducer=nan, wer_transducer=nan, loss=131.175, time=14.43 seconds, total_count=3091, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 02:43:07,651 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 02:43:07,651 (trainer:262) INFO: 12/20epoch started. Estimated time to finish: 3 hours, 24 minutes and 26.81 seconds [gpu11] 2022-06-04 02:44:14,469 (trainer:672) INFO: 12epoch:train:1-475batch: iter_time=6.544e-04, forward_time=0.050, loss_ctc=125.494, loss_transducer=64.888, loss=25.634, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 02:45:20,864 (trainer:672) INFO: 12epoch:train:476-950batch: iter_time=1.723e-04, forward_time=0.049, loss_ctc=121.958, loss_transducer=62.272, loss=24.715, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 02:46:26,705 (trainer:672) INFO: 12epoch:train:951-1425batch: iter_time=1.584e-04, forward_time=0.049, loss_ctc=128.521, loss_transducer=66.169, loss=26.181, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.554 [gpu11] 2022-06-04 02:47:34,076 (trainer:672) INFO: 12epoch:train:1426-1900batch: iter_time=1.639e-04, forward_time=0.049, loss_ctc=127.673, loss_transducer=65.358, loss=25.915, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 02:48:41,015 (trainer:672) INFO: 12epoch:train:1901-2375batch: iter_time=1.685e-04, forward_time=0.049, loss_ctc=126.348, loss_transducer=65.600, loss=25.876, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 02:49:47,584 (trainer:672) INFO: 12epoch:train:2376-2850batch: iter_time=1.619e-04, forward_time=0.049, loss_ctc=126.808, loss_transducer=65.136, loss=25.795, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 02:50:54,106 (trainer:672) INFO: 12epoch:train:2851-3325batch: iter_time=1.650e-04, forward_time=0.049, loss_ctc=129.070, loss_transducer=66.299, loss=26.255, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 02:52:00,157 (trainer:672) INFO: 12epoch:train:3326-3800batch: iter_time=1.662e-04, forward_time=0.048, loss_ctc=125.856, loss_transducer=65.109, loss=25.716, backward_time=0.044, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 02:53:07,150 (trainer:672) INFO: 12epoch:train:3801-4275batch: iter_time=1.605e-04, forward_time=0.049, loss_ctc=128.909, loss_transducer=66.092, loss=26.191, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 02:54:13,693 (trainer:672) INFO: 12epoch:train:4276-4750batch: iter_time=1.582e-04, forward_time=0.049, loss_ctc=128.968, loss_transducer=67.449, loss=26.535, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 02:55:19,754 (trainer:672) INFO: 12epoch:train:4751-5225batch: iter_time=1.635e-04, forward_time=0.049, loss_ctc=127.810, loss_transducer=66.172, loss=26.129, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 02:56:26,535 (trainer:672) INFO: 12epoch:train:5226-5700batch: iter_time=1.628e-04, forward_time=0.049, loss_ctc=127.677, loss_transducer=65.666, loss=25.992, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 02:57:32,586 (trainer:672) INFO: 12epoch:train:5701-6175batch: iter_time=1.585e-04, forward_time=0.049, loss_ctc=123.725, loss_transducer=63.611, loss=25.182, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 02:58:39,700 (trainer:672) INFO: 12epoch:train:6176-6650batch: iter_time=1.521e-04, forward_time=0.049, loss_ctc=130.662, loss_transducer=66.989, loss=26.547, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 02:59:46,443 (trainer:672) INFO: 12epoch:train:6651-7125batch: iter_time=1.652e-04, forward_time=0.049, loss_ctc=129.828, loss_transducer=66.694, loss=26.411, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:00:52,735 (trainer:672) INFO: 12epoch:train:7126-7600batch: iter_time=1.651e-04, forward_time=0.049, loss_ctc=125.778, loss_transducer=64.617, loss=25.588, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 03:01:59,214 (trainer:672) INFO: 12epoch:train:7601-8075batch: iter_time=1.611e-04, forward_time=0.049, loss_ctc=131.392, loss_transducer=68.043, loss=26.865, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 03:03:05,808 (trainer:672) INFO: 12epoch:train:8076-8550batch: iter_time=1.647e-04, forward_time=0.049, loss_ctc=127.977, loss_transducer=64.861, loss=25.814, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 03:04:12,622 (trainer:672) INFO: 12epoch:train:8551-9025batch: iter_time=1.663e-04, forward_time=0.049, loss_ctc=128.778, loss_transducer=66.548, loss=26.295, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:05:19,607 (trainer:672) INFO: 12epoch:train:9026-9500batch: iter_time=1.677e-04, forward_time=0.049, loss_ctc=127.649, loss_transducer=65.024, loss=25.830, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 03:05:34,719 (trainer:328) INFO: 12epoch results: [train] iter_time=1.878e-04, forward_time=0.049, loss_ctc=127.519, loss_transducer=65.616, loss=25.968, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561, time=22 minutes and 12.65 seconds, total_count=114036, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=164.960, cer_ctc=0.976, loss_transducer=77.149, cer_transducer=nan, wer_transducer=nan, loss=126.637, time=14.42 seconds, total_count=3372, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 03:05:37,609 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 03:05:37,637 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/11epoch.pth [gpu11] 2022-06-04 03:05:37,638 (trainer:262) INFO: 13/20epoch started. Estimated time to finish: 3 hours, 1 minute and 35.17 seconds [gpu11] 2022-06-04 03:06:43,926 (trainer:672) INFO: 13epoch:train:1-475batch: iter_time=6.352e-04, forward_time=0.049, loss_ctc=130.705, loss_transducer=65.333, loss=26.136, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 03:07:50,130 (trainer:672) INFO: 13epoch:train:476-950batch: iter_time=1.660e-04, forward_time=0.049, loss_ctc=130.095, loss_transducer=65.387, loss=26.104, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 03:08:55,813 (trainer:672) INFO: 13epoch:train:951-1425batch: iter_time=1.615e-04, forward_time=0.048, loss_ctc=127.366, loss_transducer=64.172, loss=25.595, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.553 [gpu11] 2022-06-04 03:10:02,154 (trainer:672) INFO: 13epoch:train:1426-1900batch: iter_time=1.527e-04, forward_time=0.049, loss_ctc=128.876, loss_transducer=65.212, loss=25.969, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 03:11:08,371 (trainer:672) INFO: 13epoch:train:1901-2375batch: iter_time=1.599e-04, forward_time=0.049, loss_ctc=123.969, loss_transducer=62.633, loss=24.956, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 03:12:15,186 (trainer:672) INFO: 13epoch:train:2376-2850batch: iter_time=1.594e-04, forward_time=0.049, loss_ctc=133.408, loss_transducer=68.387, loss=27.102, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:13:21,256 (trainer:672) INFO: 13epoch:train:2851-3325batch: iter_time=1.582e-04, forward_time=0.049, loss_ctc=126.730, loss_transducer=64.284, loss=25.576, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 03:14:27,868 (trainer:672) INFO: 13epoch:train:3326-3800batch: iter_time=1.553e-04, forward_time=0.049, loss_ctc=129.453, loss_transducer=65.800, loss=26.159, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 03:15:33,805 (trainer:672) INFO: 13epoch:train:3801-4275batch: iter_time=1.566e-04, forward_time=0.048, loss_ctc=125.269, loss_transducer=64.102, loss=25.421, backward_time=0.044, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.555 [gpu11] 2022-06-04 03:16:40,460 (trainer:672) INFO: 13epoch:train:4276-4750batch: iter_time=1.626e-04, forward_time=0.049, loss_ctc=123.619, loss_transducer=62.831, loss=24.979, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 03:17:46,134 (trainer:672) INFO: 13epoch:train:4751-5225batch: iter_time=1.474e-04, forward_time=0.048, loss_ctc=129.202, loss_transducer=66.683, loss=26.361, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.553 [gpu11] 2022-06-04 03:18:52,912 (trainer:672) INFO: 13epoch:train:5226-5700batch: iter_time=1.546e-04, forward_time=0.049, loss_ctc=129.116, loss_transducer=66.379, loss=26.278, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:20:00,065 (trainer:672) INFO: 13epoch:train:5701-6175batch: iter_time=1.427e-04, forward_time=0.050, loss_ctc=131.755, loss_transducer=66.561, loss=26.522, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 03:21:07,403 (trainer:672) INFO: 13epoch:train:6176-6650batch: iter_time=1.479e-04, forward_time=0.050, loss_ctc=127.520, loss_transducer=65.417, loss=25.918, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 03:22:13,729 (trainer:672) INFO: 13epoch:train:6651-7125batch: iter_time=1.451e-04, forward_time=0.049, loss_ctc=123.289, loss_transducer=61.886, loss=24.718, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 03:23:20,646 (trainer:672) INFO: 13epoch:train:7126-7600batch: iter_time=1.528e-04, forward_time=0.050, loss_ctc=127.589, loss_transducer=64.326, loss=25.651, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 03:24:27,062 (trainer:672) INFO: 13epoch:train:7601-8075batch: iter_time=1.644e-04, forward_time=0.049, loss_ctc=123.227, loss_transducer=62.163, loss=24.783, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 03:25:33,506 (trainer:672) INFO: 13epoch:train:8076-8550batch: iter_time=1.631e-04, forward_time=0.049, loss_ctc=127.889, loss_transducer=64.519, loss=25.721, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 03:26:40,185 (trainer:672) INFO: 13epoch:train:8551-9025batch: iter_time=1.567e-04, forward_time=0.049, loss_ctc=128.791, loss_transducer=64.673, loss=25.827, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 03:27:47,038 (trainer:672) INFO: 13epoch:train:9026-9500batch: iter_time=1.601e-04, forward_time=0.049, loss_ctc=126.961, loss_transducer=63.626, loss=25.429, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:28:02,235 (trainer:328) INFO: 13epoch results: [train] iter_time=1.801e-04, forward_time=0.049, loss_ctc=127.706, loss_transducer=64.700, loss=25.753, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560, time=22 minutes and 10.22 seconds, total_count=123539, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=170.642, cer_ctc=0.973, loss_transducer=77.396, cer_transducer=nan, wer_transducer=nan, loss=128.588, time=14.37 seconds, total_count=3653, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 03:28:05,263 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 03:28:05,291 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/12epoch.pth [gpu11] 2022-06-04 03:28:05,291 (trainer:262) INFO: 14/20epoch started. Estimated time to finish: 2 hours, 38 minutes and 45.6 seconds [gpu11] 2022-06-04 03:29:11,835 (trainer:672) INFO: 14epoch:train:1-475batch: iter_time=7.303e-04, forward_time=0.049, loss_ctc=130.681, loss_transducer=65.240, loss=26.111, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 03:30:17,830 (trainer:672) INFO: 14epoch:train:476-950batch: iter_time=1.569e-04, forward_time=0.049, loss_ctc=125.373, loss_transducer=62.017, loss=24.907, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.555 [gpu11] 2022-06-04 03:31:24,094 (trainer:672) INFO: 14epoch:train:951-1425batch: iter_time=1.500e-04, forward_time=0.049, loss_ctc=126.636, loss_transducer=63.867, loss=25.464, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 03:32:31,126 (trainer:672) INFO: 14epoch:train:1426-1900batch: iter_time=1.507e-04, forward_time=0.050, loss_ctc=125.549, loss_transducer=62.303, loss=24.992, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 03:33:38,394 (trainer:672) INFO: 14epoch:train:1901-2375batch: iter_time=1.503e-04, forward_time=0.050, loss_ctc=130.507, loss_transducer=64.836, loss=25.997, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 03:34:45,162 (trainer:672) INFO: 14epoch:train:2376-2850batch: iter_time=1.453e-04, forward_time=0.050, loss_ctc=130.022, loss_transducer=64.642, loss=25.912, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:35:52,460 (trainer:672) INFO: 14epoch:train:2851-3325batch: iter_time=1.441e-04, forward_time=0.050, loss_ctc=129.690, loss_transducer=64.833, loss=25.935, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 03:36:58,031 (trainer:672) INFO: 14epoch:train:3326-3800batch: iter_time=1.478e-04, forward_time=0.048, loss_ctc=124.966, loss_transducer=62.230, loss=24.930, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.552 [gpu11] 2022-06-04 03:38:04,135 (trainer:672) INFO: 14epoch:train:3801-4275batch: iter_time=1.632e-04, forward_time=0.049, loss_ctc=125.615, loss_transducer=62.653, loss=25.084, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 03:39:10,518 (trainer:672) INFO: 14epoch:train:4276-4750batch: iter_time=1.595e-04, forward_time=0.049, loss_ctc=131.825, loss_transducer=65.803, loss=26.338, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 03:40:15,875 (trainer:672) INFO: 14epoch:train:4751-5225batch: iter_time=1.614e-04, forward_time=0.048, loss_ctc=127.068, loss_transducer=62.478, loss=25.150, backward_time=0.044, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.550 [gpu11] 2022-06-04 03:41:22,321 (trainer:672) INFO: 14epoch:train:5226-5700batch: iter_time=1.654e-04, forward_time=0.049, loss_ctc=123.221, loss_transducer=60.869, loss=24.459, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 03:42:29,135 (trainer:672) INFO: 14epoch:train:5701-6175batch: iter_time=1.636e-04, forward_time=0.049, loss_ctc=128.218, loss_transducer=64.242, loss=25.677, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 03:43:34,739 (trainer:672) INFO: 14epoch:train:6176-6650batch: iter_time=1.652e-04, forward_time=0.048, loss_ctc=126.743, loss_transducer=62.616, loss=25.160, backward_time=0.044, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.552 [gpu11] 2022-06-04 03:44:40,382 (trainer:672) INFO: 14epoch:train:6651-7125batch: iter_time=1.632e-04, forward_time=0.048, loss_ctc=123.259, loss_transducer=61.232, loss=24.552, backward_time=0.044, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.552 [gpu11] 2022-06-04 03:45:47,195 (trainer:672) INFO: 14epoch:train:7126-7600batch: iter_time=1.604e-04, forward_time=0.049, loss_ctc=126.139, loss_transducer=63.343, loss=25.296, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:46:53,526 (trainer:672) INFO: 14epoch:train:7601-8075batch: iter_time=1.509e-04, forward_time=0.049, loss_ctc=130.677, loss_transducer=65.317, loss=26.130, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 03:48:00,774 (trainer:672) INFO: 14epoch:train:8076-8550batch: iter_time=1.670e-04, forward_time=0.049, loss_ctc=129.146, loss_transducer=63.737, loss=25.620, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 03:49:07,500 (trainer:672) INFO: 14epoch:train:8551-9025batch: iter_time=1.628e-04, forward_time=0.049, loss_ctc=129.415, loss_transducer=64.738, loss=25.891, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:50:14,012 (trainer:672) INFO: 14epoch:train:9026-9500batch: iter_time=1.562e-04, forward_time=0.049, loss_ctc=130.938, loss_transducer=65.023, loss=26.076, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 03:50:29,031 (trainer:328) INFO: 14epoch results: [train] iter_time=1.857e-04, forward_time=0.049, loss_ctc=127.734, loss_transducer=63.573, loss=25.473, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559, time=22 minutes and 9.4 seconds, total_count=133042, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=156.482, cer_ctc=0.976, loss_transducer=73.162, cer_transducer=nan, wer_transducer=nan, loss=120.106, time=14.34 seconds, total_count=3934, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 03:50:31,957 (trainer:376) INFO: The best model has been updated: valid.loss [gpu11] 2022-06-04 03:50:32,014 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/10epoch.pth, exp/asr_IndEng188h-rnnt-600bpe/13epoch.pth [gpu11] 2022-06-04 03:50:32,015 (trainer:262) INFO: 15/20epoch started. Estimated time to finish: 2 hours, 15 minutes and 58.77 seconds [gpu11] 2022-06-04 03:51:38,723 (trainer:672) INFO: 15epoch:train:1-475batch: iter_time=7.117e-04, forward_time=0.049, loss_ctc=127.694, loss_transducer=61.287, loss=24.899, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:52:44,797 (trainer:672) INFO: 15epoch:train:476-950batch: iter_time=1.606e-04, forward_time=0.048, loss_ctc=127.775, loss_transducer=62.665, loss=25.249, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 03:53:51,611 (trainer:672) INFO: 15epoch:train:951-1425batch: iter_time=1.603e-04, forward_time=0.049, loss_ctc=127.151, loss_transducer=62.540, loss=25.171, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 03:54:57,892 (trainer:672) INFO: 15epoch:train:1426-1900batch: iter_time=1.613e-04, forward_time=0.049, loss_ctc=125.436, loss_transducer=61.725, loss=24.839, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 03:56:03,888 (trainer:672) INFO: 15epoch:train:1901-2375batch: iter_time=1.520e-04, forward_time=0.049, loss_ctc=132.936, loss_transducer=65.212, loss=26.273, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 03:57:10,573 (trainer:672) INFO: 15epoch:train:2376-2850batch: iter_time=1.413e-04, forward_time=0.049, loss_ctc=129.450, loss_transducer=63.695, loss=25.633, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 03:58:17,537 (trainer:672) INFO: 15epoch:train:2851-3325batch: iter_time=1.410e-04, forward_time=0.050, loss_ctc=127.378, loss_transducer=62.853, loss=25.267, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 03:59:23,785 (trainer:672) INFO: 15epoch:train:3326-3800batch: iter_time=1.507e-04, forward_time=0.050, loss_ctc=126.727, loss_transducer=62.582, loss=25.150, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.557 [gpu11] 2022-06-04 04:00:30,867 (trainer:672) INFO: 15epoch:train:3801-4275batch: iter_time=1.537e-04, forward_time=0.050, loss_ctc=129.100, loss_transducer=63.332, loss=25.516, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 04:01:37,069 (trainer:672) INFO: 15epoch:train:4276-4750batch: iter_time=1.577e-04, forward_time=0.049, loss_ctc=127.266, loss_transducer=63.361, loss=25.385, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 04:02:42,901 (trainer:672) INFO: 15epoch:train:4751-5225batch: iter_time=1.589e-04, forward_time=0.049, loss_ctc=127.093, loss_transducer=62.460, loss=25.147, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.554 [gpu11] 2022-06-04 04:03:49,017 (trainer:672) INFO: 15epoch:train:5226-5700batch: iter_time=1.608e-04, forward_time=0.049, loss_ctc=122.105, loss_transducer=59.879, loss=24.128, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 04:04:56,300 (trainer:672) INFO: 15epoch:train:5701-6175batch: iter_time=1.512e-04, forward_time=0.050, loss_ctc=124.899, loss_transducer=59.701, loss=24.293, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 04:06:03,302 (trainer:672) INFO: 15epoch:train:6176-6650batch: iter_time=1.509e-04, forward_time=0.050, loss_ctc=124.617, loss_transducer=60.651, loss=24.509, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 04:07:09,461 (trainer:672) INFO: 15epoch:train:6651-7125batch: iter_time=1.506e-04, forward_time=0.049, loss_ctc=128.457, loss_transducer=62.785, loss=25.330, backward_time=0.045, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.557 [gpu11] 2022-06-04 04:08:17,134 (trainer:672) INFO: 15epoch:train:7126-7600batch: iter_time=1.515e-04, forward_time=0.050, loss_ctc=125.580, loss_transducer=61.511, loss=24.796, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 04:09:24,373 (trainer:672) INFO: 15epoch:train:7601-8075batch: iter_time=1.524e-04, forward_time=0.050, loss_ctc=130.280, loss_transducer=64.977, loss=26.015, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 04:10:30,832 (trainer:672) INFO: 15epoch:train:8076-8550batch: iter_time=1.465e-04, forward_time=0.049, loss_ctc=124.055, loss_transducer=61.020, loss=24.559, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 04:11:38,052 (trainer:672) INFO: 15epoch:train:8551-9025batch: iter_time=1.489e-04, forward_time=0.050, loss_ctc=131.966, loss_transducer=66.024, loss=26.404, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 04:12:44,823 (trainer:672) INFO: 15epoch:train:9026-9500batch: iter_time=1.488e-04, forward_time=0.049, loss_ctc=129.581, loss_transducer=64.277, loss=25.788, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 04:12:59,954 (trainer:328) INFO: 15epoch results: [train] iter_time=1.805e-04, forward_time=0.049, loss_ctc=127.448, loss_transducer=62.603, loss=25.209, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561, time=22 minutes and 13.57 seconds, total_count=142545, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=171.228, cer_ctc=0.974, loss_transducer=74.905, cer_transducer=nan, wer_transducer=nan, loss=126.273, time=14.37 seconds, total_count=4215, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 04:13:02,824 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 04:13:02,825 (trainer:262) INFO: 16/20epoch started. Estimated time to finish: 1 hour, 53 minutes and 15.98 seconds [gpu11] 2022-06-04 04:14:09,536 (trainer:672) INFO: 16epoch:train:1-475batch: iter_time=6.111e-04, forward_time=0.049, loss_ctc=126.147, loss_transducer=61.189, loss=24.758, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 04:15:16,004 (trainer:672) INFO: 16epoch:train:476-950batch: iter_time=1.480e-04, forward_time=0.049, loss_ctc=127.644, loss_transducer=61.615, loss=24.977, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 04:16:23,237 (trainer:672) INFO: 16epoch:train:951-1425batch: iter_time=1.488e-04, forward_time=0.049, loss_ctc=128.001, loss_transducer=61.571, loss=24.993, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 04:17:30,525 (trainer:672) INFO: 16epoch:train:1426-1900batch: iter_time=1.578e-04, forward_time=0.049, loss_ctc=126.788, loss_transducer=61.406, loss=24.861, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 04:18:38,070 (trainer:672) INFO: 16epoch:train:1901-2375batch: iter_time=1.521e-04, forward_time=0.050, loss_ctc=125.894, loss_transducer=61.321, loss=24.772, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 04:19:44,413 (trainer:672) INFO: 16epoch:train:2376-2850batch: iter_time=1.491e-04, forward_time=0.049, loss_ctc=126.560, loss_transducer=61.822, loss=24.947, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 04:20:50,898 (trainer:672) INFO: 16epoch:train:2851-3325batch: iter_time=1.748e-04, forward_time=0.049, loss_ctc=127.561, loss_transducer=62.398, loss=25.167, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 04:21:59,144 (trainer:672) INFO: 16epoch:train:3326-3800batch: iter_time=1.737e-04, forward_time=0.051, loss_ctc=128.804, loss_transducer=62.790, loss=25.358, backward_time=0.047, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.574 [gpu11] 2022-06-04 04:23:05,717 (trainer:672) INFO: 16epoch:train:3801-4275batch: iter_time=1.521e-04, forward_time=0.049, loss_ctc=127.360, loss_transducer=61.560, loss=24.942, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 04:24:12,876 (trainer:672) INFO: 16epoch:train:4276-4750batch: iter_time=1.665e-04, forward_time=0.049, loss_ctc=125.216, loss_transducer=60.853, loss=24.604, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 04:25:20,625 (trainer:672) INFO: 16epoch:train:4751-5225batch: iter_time=1.666e-04, forward_time=0.050, loss_ctc=129.121, loss_transducer=62.743, loss=25.370, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 04:26:27,528 (trainer:672) INFO: 16epoch:train:5226-5700batch: iter_time=1.465e-04, forward_time=0.049, loss_ctc=124.782, loss_transducer=60.442, loss=24.469, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 04:27:34,436 (trainer:672) INFO: 16epoch:train:5701-6175batch: iter_time=1.450e-04, forward_time=0.049, loss_ctc=126.076, loss_transducer=61.394, loss=24.804, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 04:28:41,903 (trainer:672) INFO: 16epoch:train:6176-6650batch: iter_time=1.504e-04, forward_time=0.050, loss_ctc=129.360, loss_transducer=63.332, loss=25.535, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.568 [gpu11] 2022-06-04 04:29:50,113 (trainer:672) INFO: 16epoch:train:6651-7125batch: iter_time=1.939e-04, forward_time=0.051, loss_ctc=128.524, loss_transducer=63.195, loss=25.438, backward_time=0.047, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.574 [gpu11] 2022-06-04 04:30:57,147 (trainer:672) INFO: 16epoch:train:7126-7600batch: iter_time=1.729e-04, forward_time=0.050, loss_ctc=126.874, loss_transducer=61.839, loss=24.975, backward_time=0.047, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 04:32:04,805 (trainer:672) INFO: 16epoch:train:7601-8075batch: iter_time=1.672e-04, forward_time=0.050, loss_ctc=128.909, loss_transducer=63.284, loss=25.489, backward_time=0.047, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 04:33:12,099 (trainer:672) INFO: 16epoch:train:8076-8550batch: iter_time=1.518e-04, forward_time=0.050, loss_ctc=125.539, loss_transducer=60.761, loss=24.606, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 04:34:18,995 (trainer:672) INFO: 16epoch:train:8551-9025batch: iter_time=1.532e-04, forward_time=0.050, loss_ctc=127.382, loss_transducer=62.296, loss=25.128, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563 [gpu11] 2022-06-04 04:35:25,643 (trainer:672) INFO: 16epoch:train:9026-9500batch: iter_time=1.536e-04, forward_time=0.050, loss_ctc=126.078, loss_transducer=61.096, loss=24.730, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 04:35:40,815 (trainer:328) INFO: 16epoch results: [train] iter_time=1.817e-04, forward_time=0.049, loss_ctc=127.135, loss_transducer=61.847, loss=24.997, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565, time=22 minutes and 23.58 seconds, total_count=152048, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=166.228, cer_ctc=0.974, loss_transducer=72.145, cer_transducer=nan, wer_transducer=nan, loss=122.013, time=14.41 seconds, total_count=4496, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 04:35:43,783 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 04:35:43,839 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/6epoch.pth, exp/asr_IndEng188h-rnnt-600bpe/15epoch.pth [gpu11] 2022-06-04 04:35:43,839 (trainer:262) INFO: 17/20epoch started. Estimated time to finish: 1 hour, 30 minutes and 37.24 seconds [gpu11] 2022-06-04 04:36:51,318 (trainer:672) INFO: 17epoch:train:1-475batch: iter_time=6.675e-04, forward_time=0.050, loss_ctc=127.061, loss_transducer=60.664, loss=24.696, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.568 [gpu11] 2022-06-04 04:37:58,016 (trainer:672) INFO: 17epoch:train:476-950batch: iter_time=1.716e-04, forward_time=0.049, loss_ctc=126.992, loss_transducer=60.484, loss=24.645, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 04:39:04,206 (trainer:672) INFO: 17epoch:train:951-1425batch: iter_time=1.709e-04, forward_time=0.048, loss_ctc=123.396, loss_transducer=58.461, loss=23.870, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.557 [gpu11] 2022-06-04 04:40:09,976 (trainer:672) INFO: 17epoch:train:1426-1900batch: iter_time=1.618e-04, forward_time=0.048, loss_ctc=128.548, loss_transducer=61.358, loss=24.981, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.553 [gpu11] 2022-06-04 04:41:15,774 (trainer:672) INFO: 17epoch:train:1901-2375batch: iter_time=1.622e-04, forward_time=0.048, loss_ctc=128.872, loss_transducer=62.219, loss=25.220, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.554 [gpu11] 2022-06-04 04:42:22,385 (trainer:672) INFO: 17epoch:train:2376-2850batch: iter_time=1.611e-04, forward_time=0.049, loss_ctc=127.897, loss_transducer=61.551, loss=24.980, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 04:43:27,860 (trainer:672) INFO: 17epoch:train:2851-3325batch: iter_time=1.622e-04, forward_time=0.048, loss_ctc=127.753, loss_transducer=61.807, loss=25.033, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.551 [gpu11] 2022-06-04 04:44:34,584 (trainer:672) INFO: 17epoch:train:3326-3800batch: iter_time=1.627e-04, forward_time=0.049, loss_ctc=127.958, loss_transducer=61.829, loss=25.054, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 04:45:40,904 (trainer:672) INFO: 17epoch:train:3801-4275batch: iter_time=1.614e-04, forward_time=0.049, loss_ctc=127.758, loss_transducer=61.841, loss=25.042, backward_time=0.044, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 04:46:47,393 (trainer:672) INFO: 17epoch:train:4276-4750batch: iter_time=1.524e-04, forward_time=0.049, loss_ctc=125.766, loss_transducer=60.428, loss=24.540, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 04:47:54,317 (trainer:672) INFO: 17epoch:train:4751-5225batch: iter_time=1.600e-04, forward_time=0.049, loss_ctc=125.864, loss_transducer=61.172, loss=24.733, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 04:49:00,432 (trainer:672) INFO: 17epoch:train:5226-5700batch: iter_time=1.621e-04, forward_time=0.049, loss_ctc=125.677, loss_transducer=60.034, loss=24.434, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 04:50:05,993 (trainer:672) INFO: 17epoch:train:5701-6175batch: iter_time=1.525e-04, forward_time=0.048, loss_ctc=123.896, loss_transducer=59.265, loss=24.108, backward_time=0.044, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.553 [gpu11] 2022-06-04 04:51:12,468 (trainer:672) INFO: 17epoch:train:6176-6650batch: iter_time=1.611e-04, forward_time=0.049, loss_ctc=130.574, loss_transducer=63.525, loss=25.674, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 04:52:17,728 (trainer:672) INFO: 17epoch:train:6651-7125batch: iter_time=1.579e-04, forward_time=0.048, loss_ctc=126.545, loss_transducer=61.182, loss=24.786, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.550 [gpu11] 2022-06-04 04:53:24,845 (trainer:672) INFO: 17epoch:train:7126-7600batch: iter_time=1.592e-04, forward_time=0.049, loss_ctc=131.796, loss_transducer=63.902, loss=25.860, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 04:54:31,149 (trainer:672) INFO: 17epoch:train:7601-8075batch: iter_time=1.596e-04, forward_time=0.049, loss_ctc=126.843, loss_transducer=60.506, loss=24.640, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 04:55:37,415 (trainer:672) INFO: 17epoch:train:8076-8550batch: iter_time=1.511e-04, forward_time=0.049, loss_ctc=128.077, loss_transducer=62.433, loss=25.214, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.557 [gpu11] 2022-06-04 04:56:43,961 (trainer:672) INFO: 17epoch:train:8551-9025batch: iter_time=1.628e-04, forward_time=0.049, loss_ctc=124.097, loss_transducer=59.603, loss=24.208, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 04:57:50,027 (trainer:672) INFO: 17epoch:train:9026-9500batch: iter_time=1.622e-04, forward_time=0.049, loss_ctc=124.895, loss_transducer=60.158, loss=24.407, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 04:58:05,218 (trainer:328) INFO: 17epoch results: [train] iter_time=1.861e-04, forward_time=0.049, loss_ctc=126.990, loss_transducer=61.106, loss=24.801, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.558, time=22 minutes and 6.93 seconds, total_count=161551, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=165.904, cer_ctc=0.974, loss_transducer=73.797, cer_transducer=nan, wer_transducer=nan, loss=123.568, time=14.44 seconds, total_count=4777, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 04:58:08,155 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 04:58:08,185 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/7epoch.pth [gpu11] 2022-06-04 04:58:08,185 (trainer:262) INFO: 18/20epoch started. Estimated time to finish: 1 hour, 7 minutes and 55.29 seconds [gpu11] 2022-06-04 04:59:14,108 (trainer:672) INFO: 18epoch:train:1-475batch: iter_time=6.772e-04, forward_time=0.049, loss_ctc=126.435, loss_transducer=59.641, loss=24.393, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 05:00:20,763 (trainer:672) INFO: 18epoch:train:476-950batch: iter_time=1.648e-04, forward_time=0.050, loss_ctc=123.596, loss_transducer=57.821, loss=23.725, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 05:01:27,058 (trainer:672) INFO: 18epoch:train:951-1425batch: iter_time=1.595e-04, forward_time=0.049, loss_ctc=125.103, loss_transducer=59.408, loss=24.235, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 05:02:33,788 (trainer:672) INFO: 18epoch:train:1426-1900batch: iter_time=1.597e-04, forward_time=0.049, loss_ctc=128.152, loss_transducer=60.845, loss=24.823, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 05:03:39,612 (trainer:672) INFO: 18epoch:train:1901-2375batch: iter_time=1.635e-04, forward_time=0.049, loss_ctc=123.238, loss_transducer=58.389, loss=23.840, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.554 [gpu11] 2022-06-04 05:04:45,990 (trainer:672) INFO: 18epoch:train:2376-2850batch: iter_time=1.639e-04, forward_time=0.049, loss_ctc=127.195, loss_transducer=60.772, loss=24.733, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 05:05:52,986 (trainer:672) INFO: 18epoch:train:2851-3325batch: iter_time=1.619e-04, forward_time=0.049, loss_ctc=124.955, loss_transducer=59.086, loss=24.143, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 05:06:59,779 (trainer:672) INFO: 18epoch:train:3326-3800batch: iter_time=1.652e-04, forward_time=0.049, loss_ctc=128.942, loss_transducer=61.012, loss=24.924, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 05:08:06,210 (trainer:672) INFO: 18epoch:train:3801-4275batch: iter_time=1.604e-04, forward_time=0.049, loss_ctc=128.195, loss_transducer=61.045, loss=24.876, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 05:09:11,980 (trainer:672) INFO: 18epoch:train:4276-4750batch: iter_time=1.502e-04, forward_time=0.049, loss_ctc=130.673, loss_transducer=62.791, loss=25.498, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.554 [gpu11] 2022-06-04 05:10:18,236 (trainer:672) INFO: 18epoch:train:4751-5225batch: iter_time=1.618e-04, forward_time=0.049, loss_ctc=130.429, loss_transducer=62.776, loss=25.476, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 05:11:24,973 (trainer:672) INFO: 18epoch:train:5226-5700batch: iter_time=1.633e-04, forward_time=0.049, loss_ctc=133.119, loss_transducer=64.017, loss=25.988, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 05:12:30,741 (trainer:672) INFO: 18epoch:train:5701-6175batch: iter_time=1.655e-04, forward_time=0.049, loss_ctc=124.774, loss_transducer=59.224, loss=24.164, backward_time=0.045, optim_step_time=0.031, optim0_lr0=0.001, train_time=0.554 [gpu11] 2022-06-04 05:13:37,314 (trainer:672) INFO: 18epoch:train:6176-6650batch: iter_time=1.551e-04, forward_time=0.050, loss_ctc=122.603, loss_transducer=58.191, loss=23.743, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 05:14:44,457 (trainer:672) INFO: 18epoch:train:6651-7125batch: iter_time=1.610e-04, forward_time=0.050, loss_ctc=127.437, loss_transducer=61.051, loss=24.821, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 05:15:51,942 (trainer:672) INFO: 18epoch:train:7126-7600batch: iter_time=1.543e-04, forward_time=0.050, loss_ctc=127.385, loss_transducer=60.905, loss=24.780, backward_time=0.046, optim_step_time=0.033, optim0_lr0=0.001, train_time=0.568 [gpu11] 2022-06-04 05:16:58,958 (trainer:672) INFO: 18epoch:train:7601-8075batch: iter_time=1.500e-04, forward_time=0.050, loss_ctc=128.957, loss_transducer=61.988, loss=25.169, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 05:18:05,309 (trainer:672) INFO: 18epoch:train:8076-8550batch: iter_time=1.513e-04, forward_time=0.049, loss_ctc=124.190, loss_transducer=59.679, loss=24.234, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 05:19:11,503 (trainer:672) INFO: 18epoch:train:8551-9025batch: iter_time=1.508e-04, forward_time=0.049, loss_ctc=126.060, loss_transducer=60.425, loss=24.561, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.557 [gpu11] 2022-06-04 05:20:18,276 (trainer:672) INFO: 18epoch:train:9026-9500batch: iter_time=1.465e-04, forward_time=0.049, loss_ctc=127.181, loss_transducer=61.463, loss=24.904, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 05:20:33,356 (trainer:328) INFO: 18epoch results: [train] iter_time=1.843e-04, forward_time=0.049, loss_ctc=126.882, loss_transducer=60.500, loss=24.641, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560, time=22 minutes and 10.83 seconds, total_count=171054, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=170.736, cer_ctc=0.975, loss_transducer=71.663, cer_transducer=nan, wer_transducer=nan, loss=122.884, time=14.34 seconds, total_count=5058, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 05:20:36,257 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 05:20:36,284 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/17epoch.pth [gpu11] 2022-06-04 05:20:36,284 (trainer:262) INFO: 19/20epoch started. Estimated time to finish: 45 minutes and 15.71 seconds [gpu11] 2022-06-04 05:21:44,458 (trainer:672) INFO: 19epoch:train:1-475batch: iter_time=6.598e-04, forward_time=0.050, loss_ctc=127.224, loss_transducer=59.192, loss=24.340, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.575 [gpu11] 2022-06-04 05:22:50,515 (trainer:672) INFO: 19epoch:train:476-950batch: iter_time=1.575e-04, forward_time=0.049, loss_ctc=122.490, loss_transducer=57.519, loss=23.567, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.556 [gpu11] 2022-06-04 05:23:56,339 (trainer:672) INFO: 19epoch:train:951-1425batch: iter_time=1.580e-04, forward_time=0.049, loss_ctc=126.241, loss_transducer=58.923, loss=24.199, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.554 [gpu11] 2022-06-04 05:25:02,584 (trainer:672) INFO: 19epoch:train:1426-1900batch: iter_time=1.504e-04, forward_time=0.049, loss_ctc=127.117, loss_transducer=60.143, loss=24.570, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 05:26:08,907 (trainer:672) INFO: 19epoch:train:1901-2375batch: iter_time=1.475e-04, forward_time=0.049, loss_ctc=129.699, loss_transducer=61.567, loss=25.119, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 05:27:15,208 (trainer:672) INFO: 19epoch:train:2376-2850batch: iter_time=1.448e-04, forward_time=0.049, loss_ctc=128.957, loss_transducer=60.885, loss=24.893, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 05:28:21,821 (trainer:672) INFO: 19epoch:train:2851-3325batch: iter_time=1.413e-04, forward_time=0.049, loss_ctc=127.783, loss_transducer=60.095, loss=24.607, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 05:29:28,674 (trainer:672) INFO: 19epoch:train:3326-3800batch: iter_time=1.472e-04, forward_time=0.050, loss_ctc=125.442, loss_transducer=59.173, loss=24.201, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 05:30:36,016 (trainer:672) INFO: 19epoch:train:3801-4275batch: iter_time=1.501e-04, forward_time=0.050, loss_ctc=126.198, loss_transducer=60.227, loss=24.522, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 05:31:42,772 (trainer:672) INFO: 19epoch:train:4276-4750batch: iter_time=1.502e-04, forward_time=0.050, loss_ctc=126.560, loss_transducer=59.806, loss=24.444, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 05:32:49,127 (trainer:672) INFO: 19epoch:train:4751-5225batch: iter_time=1.418e-04, forward_time=0.049, loss_ctc=122.880, loss_transducer=57.802, loss=23.667, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.559 [gpu11] 2022-06-04 05:33:55,118 (trainer:672) INFO: 19epoch:train:5226-5700batch: iter_time=1.439e-04, forward_time=0.049, loss_ctc=126.277, loss_transducer=60.248, loss=24.533, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.555 [gpu11] 2022-06-04 05:35:01,483 (trainer:672) INFO: 19epoch:train:5701-6175batch: iter_time=1.457e-04, forward_time=0.049, loss_ctc=127.545, loss_transducer=60.223, loss=24.622, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 05:36:08,509 (trainer:672) INFO: 19epoch:train:6176-6650batch: iter_time=1.463e-04, forward_time=0.050, loss_ctc=126.954, loss_transducer=60.067, loss=24.538, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 05:37:15,621 (trainer:672) INFO: 19epoch:train:6651-7125batch: iter_time=1.455e-04, forward_time=0.049, loss_ctc=126.138, loss_transducer=60.169, loss=24.503, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 05:38:23,416 (trainer:672) INFO: 19epoch:train:7126-7600batch: iter_time=1.516e-04, forward_time=0.050, loss_ctc=130.335, loss_transducer=61.865, loss=25.241, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.570 [gpu11] 2022-06-04 05:39:31,010 (trainer:672) INFO: 19epoch:train:7601-8075batch: iter_time=1.539e-04, forward_time=0.050, loss_ctc=125.926, loss_transducer=60.007, loss=24.446, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 05:40:38,989 (trainer:672) INFO: 19epoch:train:8076-8550batch: iter_time=1.508e-04, forward_time=0.050, loss_ctc=128.452, loss_transducer=60.501, loss=24.759, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.572 [gpu11] 2022-06-04 05:41:46,413 (trainer:672) INFO: 19epoch:train:8551-9025batch: iter_time=1.697e-04, forward_time=0.050, loss_ctc=126.925, loss_transducer=59.853, loss=24.483, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.568 [gpu11] 2022-06-04 05:42:53,604 (trainer:672) INFO: 19epoch:train:9026-9500batch: iter_time=1.586e-04, forward_time=0.049, loss_ctc=126.388, loss_transducer=59.350, loss=24.317, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 05:43:08,783 (trainer:328) INFO: 19epoch results: [train] iter_time=1.757e-04, forward_time=0.049, loss_ctc=126.758, loss_transducer=59.874, loss=24.475, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563, time=22 minutes and 18.08 seconds, total_count=180557, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=166.570, cer_ctc=0.974, loss_transducer=71.995, cer_transducer=nan, wer_transducer=nan, loss=121.966, time=14.42 seconds, total_count=5339, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 05:43:11,699 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 05:43:11,725 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/18epoch.pth [gpu11] 2022-06-04 05:43:11,725 (trainer:262) INFO: 20/20epoch started. Estimated time to finish: 22 minutes and 37.73 seconds [gpu11] 2022-06-04 05:44:17,959 (trainer:672) INFO: 20epoch:train:1-475batch: iter_time=6.617e-04, forward_time=0.049, loss_ctc=122.202, loss_transducer=56.534, loss=23.299, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.558 [gpu11] 2022-06-04 05:45:24,501 (trainer:672) INFO: 20epoch:train:476-950batch: iter_time=1.505e-04, forward_time=0.049, loss_ctc=125.599, loss_transducer=58.847, loss=24.132, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.560 [gpu11] 2022-06-04 05:46:31,500 (trainer:672) INFO: 20epoch:train:951-1425batch: iter_time=1.504e-04, forward_time=0.049, loss_ctc=125.939, loss_transducer=58.568, loss=24.088, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 05:47:38,858 (trainer:672) INFO: 20epoch:train:1426-1900batch: iter_time=1.514e-04, forward_time=0.050, loss_ctc=130.352, loss_transducer=60.952, loss=25.014, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 05:48:44,734 (trainer:672) INFO: 20epoch:train:1901-2375batch: iter_time=1.469e-04, forward_time=0.049, loss_ctc=125.870, loss_transducer=59.443, loss=24.301, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.555 [gpu11] 2022-06-04 05:49:51,753 (trainer:672) INFO: 20epoch:train:2376-2850batch: iter_time=1.445e-04, forward_time=0.049, loss_ctc=130.437, loss_transducer=61.722, loss=25.213, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 05:50:58,908 (trainer:672) INFO: 20epoch:train:2851-3325batch: iter_time=1.382e-04, forward_time=0.050, loss_ctc=128.694, loss_transducer=60.763, loss=24.843, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.566 [gpu11] 2022-06-04 05:52:05,569 (trainer:672) INFO: 20epoch:train:3326-3800batch: iter_time=1.456e-04, forward_time=0.049, loss_ctc=127.091, loss_transducer=60.066, loss=24.548, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.561 [gpu11] 2022-06-04 05:53:11,705 (trainer:672) INFO: 20epoch:train:3801-4275batch: iter_time=1.462e-04, forward_time=0.049, loss_ctc=125.467, loss_transducer=58.845, loss=24.121, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.557 [gpu11] 2022-06-04 05:54:18,845 (trainer:672) INFO: 20epoch:train:4276-4750batch: iter_time=1.467e-04, forward_time=0.050, loss_ctc=124.585, loss_transducer=58.480, loss=23.964, backward_time=0.045, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 05:55:26,258 (trainer:672) INFO: 20epoch:train:4751-5225batch: iter_time=1.502e-04, forward_time=0.050, loss_ctc=127.227, loss_transducer=60.099, loss=24.567, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.567 [gpu11] 2022-06-04 05:56:33,317 (trainer:672) INFO: 20epoch:train:5226-5700batch: iter_time=1.565e-04, forward_time=0.049, loss_ctc=126.071, loss_transducer=59.591, loss=24.353, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 05:57:40,861 (trainer:672) INFO: 20epoch:train:5701-6175batch: iter_time=1.497e-04, forward_time=0.049, loss_ctc=125.350, loss_transducer=58.749, loss=24.089, backward_time=0.050, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 05:58:47,977 (trainer:672) INFO: 20epoch:train:6176-6650batch: iter_time=1.502e-04, forward_time=0.049, loss_ctc=129.132, loss_transducer=60.701, loss=24.860, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.565 [gpu11] 2022-06-04 05:59:55,601 (trainer:672) INFO: 20epoch:train:6651-7125batch: iter_time=1.486e-04, forward_time=0.050, loss_ctc=126.655, loss_transducer=59.425, loss=24.355, backward_time=0.049, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 06:01:02,706 (trainer:672) INFO: 20epoch:train:7126-7600batch: iter_time=1.586e-04, forward_time=0.049, loss_ctc=126.690, loss_transducer=59.236, loss=24.311, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 06:02:09,689 (trainer:672) INFO: 20epoch:train:7601-8075batch: iter_time=1.639e-04, forward_time=0.049, loss_ctc=126.695, loss_transducer=58.959, loss=24.242, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.564 [gpu11] 2022-06-04 06:03:17,230 (trainer:672) INFO: 20epoch:train:8076-8550batch: iter_time=1.587e-04, forward_time=0.050, loss_ctc=128.906, loss_transducer=61.067, loss=24.935, backward_time=0.048, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.569 [gpu11] 2022-06-04 06:04:23,171 (trainer:672) INFO: 20epoch:train:8551-9025batch: iter_time=1.625e-04, forward_time=0.049, loss_ctc=123.330, loss_transducer=57.462, loss=23.615, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.555 [gpu11] 2022-06-04 06:05:29,980 (trainer:672) INFO: 20epoch:train:9026-9500batch: iter_time=1.588e-04, forward_time=0.049, loss_ctc=126.464, loss_transducer=59.726, loss=24.416, backward_time=0.046, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.562 [gpu11] 2022-06-04 06:05:45,040 (trainer:328) INFO: 20epoch results: [train] iter_time=1.770e-04, forward_time=0.049, loss_ctc=126.622, loss_transducer=59.453, loss=24.360, backward_time=0.047, optim_step_time=0.032, optim0_lr0=0.001, train_time=0.563, time=22 minutes and 18.92 seconds, total_count=190060, gpu_max_cached_mem_GB=15.092, [valid] loss_ctc=166.003, cer_ctc=0.970, loss_transducer=70.975, cer_transducer=nan, wer_transducer=nan, loss=120.776, time=14.39 seconds, total_count=5620, gpu_max_cached_mem_GB=15.092 [gpu11] 2022-06-04 06:05:47,890 (trainer:374) INFO: There are no improvements in this epoch [gpu11] 2022-06-04 06:05:47,916 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/16epoch.pth [gpu11] 2022-06-04 06:05:47,916 (trainer:448) INFO: The training was finished at 20 epochs [gpu11] 2022-06-04 06:05:47,940 (average_nbest_models:69) INFO: Averaging 3best models: criterion="valid.loss": exp/asr_IndEng188h-rnnt-600bpe/valid.loss.ave_3best.pth # Accounting: time=27161 threads=1 # Ended (code 0) at Sat Jun 4 06:05:49 IST 2022, elapsed time 27161 seconds