Arun
commit from arun
ff9a56f
# python3 -m espnet2.bin.asr_train --use_preprocessor true --bpemodel data/en_token_list/bpe_unigram600/bpe.model --token_type bpe --token_list data/en_token_list/bpe_unigram600/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_English/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/dev_English/text,text,text --valid_shape_file exp/asr_stats_raw_en_bpe600/valid/speech_shape --valid_shape_file exp/asr_stats_raw_en_bpe600/valid/text_shape.bpe --resume true --fold_length 80000 --fold_length 150 --output_dir exp/asr_IndEng188h-rnnt-600bpe --config conf/tuning/train_conformer-rnn_transducer.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/asr_stats_raw_en_bpe600/train/feats_stats.npz --train_data_path_and_name_and_type dump/raw/train_English/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train_English/text,text,text --train_shape_file exp/asr_stats_raw_en_bpe600/train/speech_shape --train_shape_file exp/asr_stats_raw_en_bpe600/train/text_shape.bpe --ngpu 1 --multiprocessing_distributed True
# Started at Tue Jun 7 21:55:51 IST 2022
#
/speech/umeshs/espnet-v.202205/tools/anaconda/envs/espnet/bin/python3 /speech/umeshs/espnet-v.202205/espnet2/bin/asr_train.py --use_preprocessor true --bpemodel data/en_token_list/bpe_unigram600/bpe.model --token_type bpe --token_list data/en_token_list/bpe_unigram600/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_English/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/dev_English/text,text,text --valid_shape_file exp/asr_stats_raw_en_bpe600/valid/speech_shape --valid_shape_file exp/asr_stats_raw_en_bpe600/valid/text_shape.bpe --resume true --fold_length 80000 --fold_length 150 --output_dir exp/asr_IndEng188h-rnnt-600bpe --config conf/tuning/train_conformer-rnn_transducer.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/asr_stats_raw_en_bpe600/train/feats_stats.npz --train_data_path_and_name_and_type dump/raw/train_English/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train_English/text,text,text --train_shape_file exp/asr_stats_raw_en_bpe600/train/speech_shape --train_shape_file exp/asr_stats_raw_en_bpe600/train/text_shape.bpe --ngpu 1 --multiprocessing_distributed True
[gpu11] 2022-06-07 21:55:53,981 (asr:399) INFO: Vocabulary size: 600
[gpu11] 2022-06-07 21:56:00,408 (abs_task:1149) INFO: pytorch.version=1.9.1, cuda.available=True, cudnn.version=8005, cudnn.benchmark=False, cudnn.deterministic=True
[gpu11] 2022-06-07 21:56:00,414 (abs_task:1150) INFO: Model structure:
ESPnetASRModel(
(frontend): DefaultFrontend(
(stft): Stft(n_fft=512, win_length=512, hop_length=160, center=True, normalized=False, onesided=True)
(frontend): Frontend()
(logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
)
(specaug): SpecAug(
(time_warp): TimeWarp(window=5, mode=bicubic)
(freq_mask): MaskAlongAxis(mask_width_range=[0, 30], num_mask=2, axis=freq)
(time_mask): MaskAlongAxis(mask_width_range=[0, 40], num_mask=2, axis=time)
)
(normalize): GlobalMVN(stats_file=exp/asr_stats_raw_en_bpe600/train/feats_stats.npz, norm_means=True, norm_vars=True)
(encoder): ConformerEncoder(
(embed): Conv2dSubsampling(
(conv): Sequential(
(0): Conv2d(1, 512, kernel_size=(3, 3), stride=(2, 2))
(1): ReLU()
(2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2))
(3): ReLU()
)
(out): Sequential(
(0): Linear(in_features=9728, out_features=512, bias=True)
(1): RelPositionalEncoding(
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(encoders): MultiSequential(
(0): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(1): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(2): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(3): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(4): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(5): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(6): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(7): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(8): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(9): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(10): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(11): EncoderLayer(
(self_attn): RelPositionMultiHeadedAttention(
(linear_q): Linear(in_features=512, out_features=512, bias=True)
(linear_k): Linear(in_features=512, out_features=512, bias=True)
(linear_v): Linear(in_features=512, out_features=512, bias=True)
(linear_out): Linear(in_features=512, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(linear_pos): Linear(in_features=512, out_features=512, bias=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(feed_forward_macaron): PositionwiseFeedForward(
(w_1): Linear(in_features=512, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=512, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): Swish()
)
(conv_module): ConvolutionModule(
(pointwise_conv1): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
(depthwise_conv): Conv1d(512, 512, kernel_size=(15,), stride=(1,), padding=(7,), groups=512)
(norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pointwise_conv2): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
(activation): Swish()
)
(norm_ff): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_mha): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_ff_macaron): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_conv): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(norm_final): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(after_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
)
(decoder): TransducerDecoder(
(embed): Embedding(600, 512, padding_idx=0)
(dropout_embed): Dropout(p=0.2, inplace=False)
(decoder): ModuleList(
(0): LSTM(512, 512, batch_first=True)
)
(dropout_dec): ModuleList(
(0): Dropout(p=0.1, inplace=False)
)
)
(joint_network): JointNetwork(
(lin_enc): Linear(in_features=512, out_features=640, bias=True)
(lin_dec): Linear(in_features=512, out_features=640, bias=True)
(lin_out): Linear(in_features=640, out_features=600, bias=True)
(joint_activation): Tanh()
)
(criterion_transducer): RNNTLoss()
(ctc): CTC(
(ctc_lo): Linear(in_features=512, out_features=600, bias=True)
(ctc_loss): CTCLoss()
)
)
Model summary:
Class Name: ESPnetASRModel
Total Number of model parameters: 86.89 M
Number of trainable parameters: 86.89 M (100.0%)
Size: 347.56 MB
Type: torch.float32
[gpu11] 2022-06-07 21:56:00,414 (abs_task:1153) INFO: Optimizer:
Adam (
Parameter Group 0
amsgrad: False
betas: (0.9, 0.999)
eps: 1e-08
initial_lr: 0.0008
lr: 3.2e-08
weight_decay: 1e-06
)
[gpu11] 2022-06-07 21:56:00,414 (abs_task:1154) INFO: Scheduler: WarmupLR(warmup_steps=25000)
[gpu11] 2022-06-07 21:56:00,414 (abs_task:1163) INFO: Saving the configuration in exp/asr_IndEng188h-rnnt-600bpe/config.yaml
[gpu11] 2022-06-07 21:56:01,378 (abs_task:1517) INFO: [train] dataset:
ESPnetDataset(
speech: {"path": "dump/raw/train_English/wav.scp", "type": "sound"}
text: {"path": "dump/raw/train_English/text", "type": "text"}
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f01c7bf4bb0>)
[gpu11] 2022-06-07 21:56:01,378 (abs_task:1518) INFO: [train] Batch sampler: FoldedBatchSampler(N-batch=9503, batch_size=20, shape_files=['exp/asr_stats_raw_en_bpe600/train/speech_shape', 'exp/asr_stats_raw_en_bpe600/train/text_shape.bpe'], sort_in_batch=descending, sort_batch=descending)
[gpu11] 2022-06-07 21:56:01,380 (abs_task:1519) INFO: [train] mini-batch sizes summary: N-batch=9503, mean=11.6, min=2, max=20
[gpu11] 2022-06-07 21:56:01,426 (abs_task:1517) INFO: [valid] dataset:
ESPnetDataset(
speech: {"path": "dump/raw/dev_English/wav.scp", "type": "sound"}
text: {"path": "dump/raw/dev_English/text", "type": "text"}
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f01cb2f0d00>)
[gpu11] 2022-06-07 21:56:01,426 (abs_task:1518) INFO: [valid] Batch sampler: FoldedBatchSampler(N-batch=281, batch_size=20, shape_files=['exp/asr_stats_raw_en_bpe600/valid/speech_shape', 'exp/asr_stats_raw_en_bpe600/valid/text_shape.bpe'], sort_in_batch=descending, sort_batch=descending)
[gpu11] 2022-06-07 21:56:01,426 (abs_task:1519) INFO: [valid] mini-batch sizes summary: N-batch=281, mean=10.7, min=4, max=20
[gpu11] 2022-06-07 21:56:01,545 (trainer:274) INFO: 1/20epoch started
/speech/umeshs/espnet-v.202205/tools/anaconda/envs/espnet/lib/python3.8/site-packages/torch/_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at /opt/conda/conda-bld/pytorch_1631630839582/work/aten/src/ATen/native/BinaryOps.cpp:467.)
return torch.floor_divide(self, other)
[gpu11] 2022-06-07 21:57:07,786 (trainer:672) INFO: 1epoch:train:1-475batch: iter_time=6.790e-04, forward_time=0.050, loss_ctc=680.298, loss_transducer=657.364, loss=215.363, backward_time=0.045, optim_step_time=0.033, optim0_lr0=1.936e-06, train_time=0.558
[gpu11] 2022-06-07 21:58:13,987 (trainer:672) INFO: 1epoch:train:476-950batch: iter_time=1.740e-04, forward_time=0.050, loss_ctc=208.175, loss_transducer=205.430, loss=66.971, backward_time=0.046, optim_step_time=0.032, optim0_lr0=5.728e-06, train_time=0.557
[gpu11] 2022-06-07 21:59:20,597 (trainer:672) INFO: 1epoch:train:951-1425batch: iter_time=1.660e-04, forward_time=0.050, loss_ctc=142.299, loss_transducer=141.642, loss=46.083, backward_time=0.046, optim_step_time=0.033, optim0_lr0=9.536e-06, train_time=0.561
[gpu11] 2022-06-07 22:00:26,179 (trainer:672) INFO: 1epoch:train:1426-1900batch: iter_time=1.710e-04, forward_time=0.049, loss_ctc=133.894, loss_transducer=133.348, loss=43.379, backward_time=0.048, optim_step_time=0.033, optim0_lr0=1.334e-05, train_time=0.552
[gpu11] 2022-06-07 22:01:32,745 (trainer:672) INFO: 1epoch:train:1901-2375batch: iter_time=1.770e-04, forward_time=0.050, loss_ctc=136.295, loss_transducer=136.006, loss=44.224, backward_time=0.047, optim_step_time=0.033, optim0_lr0=1.714e-05, train_time=0.561
[gpu11] 2022-06-07 22:02:38,509 (trainer:672) INFO: 1epoch:train:2376-2850batch: iter_time=1.959e-04, forward_time=0.049, loss_ctc=133.174, loss_transducer=132.673, loss=43.156, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.093e-05, train_time=0.554
[gpu11] 2022-06-07 22:03:44,849 (trainer:672) INFO: 1epoch:train:2851-3325batch: iter_time=1.881e-04, forward_time=0.050, loss_ctc=135.935, loss_transducer=134.835, loss=43.904, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.474e-05, train_time=0.558
[gpu11] 2022-06-07 22:04:51,474 (trainer:672) INFO: 1epoch:train:3326-3800batch: iter_time=1.888e-04, forward_time=0.050, loss_ctc=132.813, loss_transducer=130.363, loss=42.552, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.854e-05, train_time=0.560
[gpu11] 2022-06-07 22:05:57,254 (trainer:672) INFO: 1epoch:train:3801-4275batch: iter_time=1.818e-04, forward_time=0.050, loss_ctc=126.542, loss_transducer=122.214, loss=40.044, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.234e-05, train_time=0.554
[gpu11] 2022-06-07 22:07:02,504 (trainer:672) INFO: 1epoch:train:4276-4750batch: iter_time=1.773e-04, forward_time=0.049, loss_ctc=132.448, loss_transducer=125.423, loss=41.289, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.613e-05, train_time=0.550
[gpu11] 2022-06-07 22:08:08,082 (trainer:672) INFO: 1epoch:train:4751-5225batch: iter_time=1.836e-04, forward_time=0.049, loss_ctc=129.755, loss_transducer=119.817, loss=39.686, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.994e-05, train_time=0.552
[gpu11] 2022-06-07 22:09:13,970 (trainer:672) INFO: 1epoch:train:5226-5700batch: iter_time=1.602e-04, forward_time=0.050, loss_ctc=123.927, loss_transducer=112.388, loss=37.391, backward_time=0.047, optim_step_time=0.033, optim0_lr0=4.374e-05, train_time=0.554
[gpu11] 2022-06-07 22:10:20,616 (trainer:672) INFO: 1epoch:train:5701-6175batch: iter_time=1.622e-04, forward_time=0.050, loss_ctc=133.980, loss_transducer=119.720, loss=39.978, backward_time=0.047, optim_step_time=0.033, optim0_lr0=4.754e-05, train_time=0.562
[gpu11] 2022-06-07 22:11:26,831 (trainer:672) INFO: 1epoch:train:6176-6650batch: iter_time=1.692e-04, forward_time=0.050, loss_ctc=132.316, loss_transducer=116.300, loss=38.999, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.133e-05, train_time=0.557
[gpu11] 2022-06-07 22:12:32,350 (trainer:672) INFO: 1epoch:train:6651-7125batch: iter_time=1.876e-04, forward_time=0.049, loss_ctc=125.560, loss_transducer=108.212, loss=36.470, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.514e-05, train_time=0.552
[gpu11] 2022-06-07 22:13:38,596 (trainer:672) INFO: 1epoch:train:7126-7600batch: iter_time=1.684e-04, forward_time=0.050, loss_ctc=130.631, loss_transducer=111.212, loss=37.600, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.894e-05, train_time=0.557
[gpu11] 2022-06-07 22:14:44,695 (trainer:672) INFO: 1epoch:train:7601-8075batch: iter_time=1.649e-04, forward_time=0.050, loss_ctc=129.327, loss_transducer=109.073, loss=36.968, backward_time=0.047, optim_step_time=0.033, optim0_lr0=6.274e-05, train_time=0.557
[gpu11] 2022-06-07 22:15:51,081 (trainer:672) INFO: 1epoch:train:8076-8550batch: iter_time=1.688e-04, forward_time=0.050, loss_ctc=130.747, loss_transducer=108.773, loss=36.999, backward_time=0.047, optim_step_time=0.033, optim0_lr0=6.653e-05, train_time=0.559
[gpu11] 2022-06-07 22:16:56,466 (trainer:672) INFO: 1epoch:train:8551-9025batch: iter_time=1.636e-04, forward_time=0.050, loss_ctc=124.123, loss_transducer=102.484, loss=34.930, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.034e-05, train_time=0.550
[gpu11] 2022-06-07 22:18:03,425 (trainer:672) INFO: 1epoch:train:9026-9500batch: iter_time=1.657e-04, forward_time=0.050, loss_ctc=129.368, loss_transducer=106.397, loss=36.302, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.414e-05, train_time=0.563
/speech/umeshs/espnet-v.202205/espnet2/train/reporter.py:79: UserWarning: No valid stats found
warnings.warn("No valid stats found")
[gpu11] 2022-06-07 22:18:18,714 (trainer:328) INFO: 1epoch results: [train] iter_time=1.996e-04, forward_time=0.050, loss_ctc=162.002, loss_transducer=151.131, loss=49.933, backward_time=0.046, optim_step_time=0.033, optim0_lr0=3.805e-05, train_time=0.556, time=22 minutes and 2.64 seconds, total_count=9503, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=153.939, cer_ctc=0.952, loss_transducer=127.366, cer_transducer=nan, wer_transducer=nan, loss=173.547, time=14.51 seconds, total_count=281, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-07 22:18:21,561 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-07 22:18:21,561 (trainer:262) INFO: 2/20epoch started. Estimated time to finish: 7 hours, 4 minutes and 20.31 seconds
[gpu11] 2022-06-07 22:19:29,062 (trainer:672) INFO: 2epoch:train:1-475batch: iter_time=8.280e-04, forward_time=0.051, loss_ctc=126.170, loss_transducer=102.831, loss=35.171, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.794e-05, train_time=0.568
[gpu11] 2022-06-07 22:20:36,168 (trainer:672) INFO: 2epoch:train:476-950batch: iter_time=1.698e-04, forward_time=0.050, loss_ctc=124.096, loss_transducer=100.651, loss=34.470, backward_time=0.047, optim_step_time=0.033, optim0_lr0=8.173e-05, train_time=0.565
[gpu11] 2022-06-07 22:21:42,023 (trainer:672) INFO: 2epoch:train:951-1425batch: iter_time=1.824e-04, forward_time=0.050, loss_ctc=121.342, loss_transducer=97.254, loss=33.414, backward_time=0.046, optim_step_time=0.033, optim0_lr0=8.554e-05, train_time=0.555
[gpu11] 2022-06-07 22:22:48,259 (trainer:672) INFO: 2epoch:train:1426-1900batch: iter_time=1.748e-04, forward_time=0.050, loss_ctc=122.882, loss_transducer=97.726, loss=33.648, backward_time=0.046, optim_step_time=0.033, optim0_lr0=8.934e-05, train_time=0.557
[gpu11] 2022-06-07 22:23:54,168 (trainer:672) INFO: 2epoch:train:1901-2375batch: iter_time=1.768e-04, forward_time=0.049, loss_ctc=127.216, loss_transducer=100.810, loss=34.744, backward_time=0.045, optim_step_time=0.032, optim0_lr0=9.314e-05, train_time=0.555
[gpu11] 2022-06-07 22:24:59,894 (trainer:672) INFO: 2epoch:train:2376-2850batch: iter_time=1.568e-04, forward_time=0.049, loss_ctc=123.229, loss_transducer=96.728, loss=33.424, backward_time=0.046, optim_step_time=0.033, optim0_lr0=9.693e-05, train_time=0.553
[gpu11] 2022-06-07 22:26:06,012 (trainer:672) INFO: 2epoch:train:2851-3325batch: iter_time=1.506e-04, forward_time=0.050, loss_ctc=119.494, loss_transducer=92.596, loss=32.111, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.007e-04, train_time=0.557
[gpu11] 2022-06-07 22:27:11,420 (trainer:672) INFO: 2epoch:train:3326-3800batch: iter_time=1.506e-04, forward_time=0.050, loss_ctc=118.976, loss_transducer=91.801, loss=31.874, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.045e-04, train_time=0.551
[gpu11] 2022-06-07 22:28:17,842 (trainer:672) INFO: 2epoch:train:3801-4275batch: iter_time=1.492e-04, forward_time=0.050, loss_ctc=122.885, loss_transducer=94.104, loss=32.742, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.083e-04, train_time=0.560
[gpu11] 2022-06-07 22:29:23,145 (trainer:672) INFO: 2epoch:train:4276-4750batch: iter_time=1.506e-04, forward_time=0.049, loss_ctc=113.831, loss_transducer=85.779, loss=29.982, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.121e-04, train_time=0.550
[gpu11] 2022-06-07 22:30:28,715 (trainer:672) INFO: 2epoch:train:4751-5225batch: iter_time=1.631e-04, forward_time=0.049, loss_ctc=115.010, loss_transducer=86.618, loss=30.280, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.159e-04, train_time=0.552
[gpu11] 2022-06-07 22:31:34,443 (trainer:672) INFO: 2epoch:train:5226-5700batch: iter_time=1.597e-04, forward_time=0.049, loss_ctc=108.844, loss_transducer=81.708, loss=28.590, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.197e-04, train_time=0.553
[gpu11] 2022-06-07 22:32:40,181 (trainer:672) INFO: 2epoch:train:5701-6175batch: iter_time=1.624e-04, forward_time=0.049, loss_ctc=109.327, loss_transducer=81.713, loss=28.628, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.235e-04, train_time=0.554
[gpu11] 2022-06-07 22:33:45,905 (trainer:672) INFO: 2epoch:train:6176-6650batch: iter_time=1.617e-04, forward_time=0.049, loss_ctc=100.044, loss_transducer=73.360, loss=25.843, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.273e-04, train_time=0.553
[gpu11] 2022-06-07 22:34:50,765 (trainer:672) INFO: 2epoch:train:6651-7125batch: iter_time=1.484e-04, forward_time=0.049, loss_ctc=99.149, loss_transducer=71.941, loss=25.421, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.311e-04, train_time=0.546
[gpu11] 2022-06-07 22:35:56,143 (trainer:672) INFO: 2epoch:train:7126-7600batch: iter_time=1.593e-04, forward_time=0.049, loss_ctc=98.112, loss_transducer=70.507, loss=24.985, backward_time=0.045, optim_step_time=0.033, optim0_lr0=1.349e-04, train_time=0.550
[gpu11] 2022-06-07 22:37:02,111 (trainer:672) INFO: 2epoch:train:7601-8075batch: iter_time=1.773e-04, forward_time=0.049, loss_ctc=94.347, loss_transducer=66.940, loss=23.811, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.387e-04, train_time=0.555
[gpu11] 2022-06-07 22:38:11,170 (trainer:672) INFO: 2epoch:train:8076-8550batch: iter_time=2.022e-04, forward_time=0.053, loss_ctc=96.478, loss_transducer=68.280, loss=24.306, backward_time=0.051, optim_step_time=0.036, optim0_lr0=1.425e-04, train_time=0.582
[gpu11] 2022-06-07 22:39:16,395 (trainer:672) INFO: 2epoch:train:8551-9025batch: iter_time=1.465e-04, forward_time=0.049, loss_ctc=95.508, loss_transducer=67.386, loss=24.009, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.463e-04, train_time=0.549
[gpu11] 2022-06-07 22:40:21,969 (trainer:672) INFO: 2epoch:train:9026-9500batch: iter_time=1.417e-04, forward_time=0.049, loss_ctc=91.648, loss_transducer=63.767, loss=22.815, backward_time=0.046, optim_step_time=0.032, optim0_lr0=1.501e-04, train_time=0.552
/speech/umeshs/espnet-v.202205/espnet2/train/reporter.py:79: UserWarning: No valid stats found
warnings.warn("No valid stats found")
[gpu11] 2022-06-07 22:40:37,267 (trainer:328) INFO: 2epoch results: [train] iter_time=1.956e-04, forward_time=0.050, loss_ctc=111.431, loss_transducer=84.640, loss=29.517, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.140e-04, train_time=0.556, time=22 minutes and 1.13 seconds, total_count=19006, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=96.253, cer_ctc=0.560, loss_transducer=65.827, cer_transducer=nan, wer_transducer=nan, loss=94.703, time=14.57 seconds, total_count=562, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-07 22:40:40,121 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-07 22:40:40,122 (trainer:262) INFO: 3/20epoch started. Estimated time to finish: 6 hours, 41 minutes and 47.19 seconds
[gpu11] 2022-06-07 22:41:47,062 (trainer:672) INFO: 3epoch:train:1-475batch: iter_time=0.001, forward_time=0.050, loss_ctc=87.985, loss_transducer=60.419, loss=21.704, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.539e-04, train_time=0.564
[gpu11] 2022-06-07 22:42:51,959 (trainer:672) INFO: 3epoch:train:476-950batch: iter_time=1.762e-04, forward_time=0.049, loss_ctc=83.883, loss_transducer=57.352, loss=20.629, backward_time=0.045, optim_step_time=0.033, optim0_lr0=1.577e-04, train_time=0.546
[gpu11] 2022-06-07 22:43:58,731 (trainer:672) INFO: 3epoch:train:951-1425batch: iter_time=1.745e-04, forward_time=0.050, loss_ctc=86.103, loss_transducer=58.180, loss=21.003, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.615e-04, train_time=0.562
[gpu11] 2022-06-07 22:45:04,263 (trainer:672) INFO: 3epoch:train:1426-1900batch: iter_time=1.547e-04, forward_time=0.050, loss_ctc=83.835, loss_transducer=55.871, loss=20.255, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.653e-04, train_time=0.552
[gpu11] 2022-06-07 22:46:10,849 (trainer:672) INFO: 3epoch:train:1901-2375batch: iter_time=1.547e-04, forward_time=0.050, loss_ctc=82.549, loss_transducer=54.874, loss=19.910, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.691e-04, train_time=0.561
[gpu11] 2022-06-07 22:47:16,396 (trainer:672) INFO: 3epoch:train:2376-2850batch: iter_time=1.528e-04, forward_time=0.050, loss_ctc=80.590, loss_transducer=52.958, loss=19.284, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.729e-04, train_time=0.551
[gpu11] 2022-06-07 22:48:22,027 (trainer:672) INFO: 3epoch:train:2851-3325batch: iter_time=1.623e-04, forward_time=0.049, loss_ctc=78.539, loss_transducer=50.984, loss=18.636, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.767e-04, train_time=0.552
[gpu11] 2022-06-07 22:49:28,411 (trainer:672) INFO: 3epoch:train:3326-3800batch: iter_time=1.539e-04, forward_time=0.050, loss_ctc=76.654, loss_transducer=49.105, loss=18.025, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.805e-04, train_time=0.558
[gpu11] 2022-06-07 22:50:33,811 (trainer:672) INFO: 3epoch:train:3801-4275batch: iter_time=1.527e-04, forward_time=0.049, loss_ctc=73.174, loss_transducer=46.573, loss=17.131, backward_time=0.046, optim_step_time=0.033, optim0_lr0=1.843e-04, train_time=0.551
[gpu11] 2022-06-07 22:51:39,139 (trainer:672) INFO: 3epoch:train:4276-4750batch: iter_time=1.625e-04, forward_time=0.049, loss_ctc=72.873, loss_transducer=45.990, loss=16.963, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.881e-04, train_time=0.550
[gpu11] 2022-06-07 22:52:44,941 (trainer:672) INFO: 3epoch:train:4751-5225batch: iter_time=1.620e-04, forward_time=0.049, loss_ctc=73.446, loss_transducer=45.945, loss=16.995, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.919e-04, train_time=0.554
[gpu11] 2022-06-07 22:53:50,818 (trainer:672) INFO: 3epoch:train:5226-5700batch: iter_time=1.632e-04, forward_time=0.049, loss_ctc=72.173, loss_transducer=44.983, loss=16.659, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.957e-04, train_time=0.554
[gpu11] 2022-06-07 22:54:56,292 (trainer:672) INFO: 3epoch:train:5701-6175batch: iter_time=1.591e-04, forward_time=0.049, loss_ctc=75.031, loss_transducer=46.494, loss=17.251, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.995e-04, train_time=0.551
[gpu11] 2022-06-07 22:56:02,716 (trainer:672) INFO: 3epoch:train:6176-6650batch: iter_time=1.593e-04, forward_time=0.049, loss_ctc=70.215, loss_transducer=43.101, loss=16.041, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.033e-04, train_time=0.559
[gpu11] 2022-06-07 22:57:08,017 (trainer:672) INFO: 3epoch:train:6651-7125batch: iter_time=1.624e-04, forward_time=0.049, loss_ctc=71.864, loss_transducer=43.742, loss=16.325, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.071e-04, train_time=0.550
[gpu11] 2022-06-07 22:58:13,236 (trainer:672) INFO: 3epoch:train:7126-7600batch: iter_time=1.528e-04, forward_time=0.049, loss_ctc=69.705, loss_transducer=42.607, loss=15.880, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.109e-04, train_time=0.549
[gpu11] 2022-06-07 22:59:19,120 (trainer:672) INFO: 3epoch:train:7601-8075batch: iter_time=1.557e-04, forward_time=0.049, loss_ctc=69.443, loss_transducer=41.776, loss=15.652, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.147e-04, train_time=0.555
[gpu11] 2022-06-07 23:00:25,443 (trainer:672) INFO: 3epoch:train:8076-8550batch: iter_time=1.574e-04, forward_time=0.050, loss_ctc=69.231, loss_transducer=41.589, loss=15.589, backward_time=0.046, optim_step_time=0.033, optim0_lr0=2.185e-04, train_time=0.558
[gpu11] 2022-06-07 23:01:30,895 (trainer:672) INFO: 3epoch:train:8551-9025batch: iter_time=1.498e-04, forward_time=0.049, loss_ctc=65.331, loss_transducer=38.743, loss=14.586, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.223e-04, train_time=0.551
[gpu11] 2022-06-07 23:02:36,607 (trainer:672) INFO: 3epoch:train:9026-9500batch: iter_time=1.574e-04, forward_time=0.049, loss_ctc=66.075, loss_transducer=38.837, loss=14.665, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.261e-04, train_time=0.553
[gpu11] 2022-06-07 23:02:51,921 (trainer:328) INFO: 3epoch results: [train] iter_time=2.155e-04, forward_time=0.049, loss_ctc=75.382, loss_transducer=47.965, loss=17.645, backward_time=0.045, optim_step_time=0.032, optim0_lr0=1.900e-04, train_time=0.554, time=21 minutes and 57.23 seconds, total_count=28509, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=69.044, cer_ctc=0.376, loss_transducer=37.931, cer_transducer=nan, wer_transducer=nan, loss=58.644, time=14.56 seconds, total_count=843, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-07 23:02:54,763 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-07 23:02:54,763 (trainer:262) INFO: 4/20epoch started. Estimated time to finish: 6 hours, 19 minutes and 1.57 seconds
[gpu11] 2022-06-07 23:03:59,742 (trainer:672) INFO: 4epoch:train:1-475batch: iter_time=0.001, forward_time=0.049, loss_ctc=64.857, loss_transducer=37.793, loss=14.313, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.299e-04, train_time=0.547
[gpu11] 2022-06-07 23:05:06,614 (trainer:672) INFO: 4epoch:train:476-950batch: iter_time=1.813e-04, forward_time=0.050, loss_ctc=68.917, loss_transducer=40.226, loss=15.225, backward_time=0.046, optim_step_time=0.033, optim0_lr0=2.337e-04, train_time=0.563
[gpu11] 2022-06-07 23:06:12,520 (trainer:672) INFO: 4epoch:train:951-1425batch: iter_time=1.891e-04, forward_time=0.049, loss_ctc=65.246, loss_transducer=37.669, loss=14.311, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.375e-04, train_time=0.555
[gpu11] 2022-06-07 23:07:18,288 (trainer:672) INFO: 4epoch:train:1426-1900batch: iter_time=1.841e-04, forward_time=0.050, loss_ctc=63.787, loss_transducer=36.502, loss=13.909, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.413e-04, train_time=0.553
[gpu11] 2022-06-07 23:08:23,389 (trainer:672) INFO: 4epoch:train:1901-2375batch: iter_time=1.565e-04, forward_time=0.048, loss_ctc=59.822, loss_transducer=34.130, loss=13.019, backward_time=0.044, optim_step_time=0.032, optim0_lr0=2.451e-04, train_time=0.549
[gpu11] 2022-06-07 23:09:28,246 (trainer:672) INFO: 4epoch:train:2376-2850batch: iter_time=1.552e-04, forward_time=0.048, loss_ctc=62.022, loss_transducer=35.227, loss=13.458, backward_time=0.044, optim_step_time=0.033, optim0_lr0=2.489e-04, train_time=0.546
[gpu11] 2022-06-07 23:10:33,745 (trainer:672) INFO: 4epoch:train:2851-3325batch: iter_time=1.626e-04, forward_time=0.049, loss_ctc=65.708, loss_transducer=37.478, loss=14.298, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.527e-04, train_time=0.551
[gpu11] 2022-06-07 23:11:38,840 (trainer:672) INFO: 4epoch:train:3326-3800batch: iter_time=1.614e-04, forward_time=0.048, loss_ctc=59.705, loss_transducer=33.844, loss=12.939, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.565e-04, train_time=0.548
[gpu11] 2022-06-07 23:12:43,727 (trainer:672) INFO: 4epoch:train:3801-4275batch: iter_time=1.629e-04, forward_time=0.048, loss_ctc=61.194, loss_transducer=34.582, loss=13.235, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.603e-04, train_time=0.546
[gpu11] 2022-06-07 23:13:49,518 (trainer:672) INFO: 4epoch:train:4276-4750batch: iter_time=1.652e-04, forward_time=0.049, loss_ctc=62.370, loss_transducer=35.134, loss=13.461, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.641e-04, train_time=0.554
[gpu11] 2022-06-07 23:14:54,817 (trainer:672) INFO: 4epoch:train:4751-5225batch: iter_time=1.639e-04, forward_time=0.049, loss_ctc=58.503, loss_transducer=32.595, loss=12.536, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.679e-04, train_time=0.550
[gpu11] 2022-06-07 23:16:00,855 (trainer:672) INFO: 4epoch:train:5226-5700batch: iter_time=1.667e-04, forward_time=0.049, loss_ctc=59.324, loss_transducer=33.058, loss=12.714, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.717e-04, train_time=0.556
[gpu11] 2022-06-07 23:17:07,050 (trainer:672) INFO: 4epoch:train:5701-6175batch: iter_time=1.566e-04, forward_time=0.049, loss_ctc=61.526, loss_transducer=34.084, loss=13.135, backward_time=0.045, optim_step_time=0.033, optim0_lr0=2.755e-04, train_time=0.558
[gpu11] 2022-06-07 23:18:13,652 (trainer:672) INFO: 4epoch:train:6176-6650batch: iter_time=1.651e-04, forward_time=0.050, loss_ctc=62.524, loss_transducer=34.867, loss=13.406, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.793e-04, train_time=0.560
[gpu11] 2022-06-07 23:19:19,662 (trainer:672) INFO: 4epoch:train:6651-7125batch: iter_time=1.619e-04, forward_time=0.049, loss_ctc=62.735, loss_transducer=34.670, loss=13.373, backward_time=0.045, optim_step_time=0.032, optim0_lr0=2.831e-04, train_time=0.556
[gpu11] 2022-06-07 23:20:25,687 (trainer:672) INFO: 4epoch:train:7126-7600batch: iter_time=1.508e-04, forward_time=0.050, loss_ctc=60.248, loss_transducer=33.178, loss=12.813, backward_time=0.046, optim_step_time=0.032, optim0_lr0=2.869e-04, train_time=0.555
[gpu11] 2022-06-07 23:21:33,495 (trainer:672) INFO: 4epoch:train:7601-8075batch: iter_time=1.829e-04, forward_time=0.052, loss_ctc=56.094, loss_transducer=30.476, loss=11.826, backward_time=0.051, optim_step_time=0.033, optim0_lr0=2.907e-04, train_time=0.571
[gpu11] 2022-06-07 23:22:43,139 (trainer:672) INFO: 4epoch:train:8076-8550batch: iter_time=2.006e-04, forward_time=0.055, loss_ctc=59.182, loss_transducer=32.351, loss=12.526, backward_time=0.054, optim_step_time=0.034, optim0_lr0=2.945e-04, train_time=0.586
[gpu11] 2022-06-07 23:23:54,351 (trainer:672) INFO: 4epoch:train:8551-9025batch: iter_time=2.436e-04, forward_time=0.056, loss_ctc=59.690, loss_transducer=32.881, loss=12.697, backward_time=0.054, optim_step_time=0.034, optim0_lr0=2.983e-04, train_time=0.599
[gpu11] 2022-06-07 23:25:03,283 (trainer:672) INFO: 4epoch:train:9026-9500batch: iter_time=2.098e-04, forward_time=0.053, loss_ctc=58.713, loss_transducer=31.812, loss=12.357, backward_time=0.051, optim_step_time=0.033, optim0_lr0=3.021e-04, train_time=0.580
[gpu11] 2022-06-07 23:25:18,878 (trainer:328) INFO: 4epoch results: [train] iter_time=2.332e-04, forward_time=0.050, loss_ctc=61.576, loss_transducer=34.606, loss=13.270, backward_time=0.047, optim_step_time=0.033, optim0_lr0=2.660e-04, train_time=0.559, time=22 minutes and 9.32 seconds, total_count=38012, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=57.640, cer_ctc=0.330, loss_transducer=28.338, cer_transducer=nan, wer_transducer=nan, loss=45.630, time=14.8 seconds, total_count=1124, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-07 23:25:21,752 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-07 23:25:21,780 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/1epoch.pth
[gpu11] 2022-06-07 23:25:21,781 (trainer:262) INFO: 5/20epoch started. Estimated time to finish: 5 hours, 57 minutes and 20.94 seconds
[gpu11] 2022-06-07 23:26:27,975 (trainer:672) INFO: 5epoch:train:1-475batch: iter_time=0.001, forward_time=0.050, loss_ctc=54.851, loss_transducer=29.154, loss=11.402, backward_time=0.048, optim_step_time=0.033, optim0_lr0=3.059e-04, train_time=0.558
[gpu11] 2022-06-07 23:27:34,256 (trainer:672) INFO: 5epoch:train:476-950batch: iter_time=1.726e-04, forward_time=0.049, loss_ctc=55.843, loss_transducer=29.907, loss=11.665, backward_time=0.048, optim_step_time=0.033, optim0_lr0=3.097e-04, train_time=0.558
[gpu11] 2022-06-07 23:28:40,009 (trainer:672) INFO: 5epoch:train:951-1425batch: iter_time=1.578e-04, forward_time=0.049, loss_ctc=55.918, loss_transducer=29.678, loss=11.613, backward_time=0.046, optim_step_time=0.033, optim0_lr0=3.135e-04, train_time=0.553
[gpu11] 2022-06-07 23:29:45,542 (trainer:672) INFO: 5epoch:train:1426-1900batch: iter_time=1.626e-04, forward_time=0.049, loss_ctc=57.645, loss_transducer=30.924, loss=12.054, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.173e-04, train_time=0.551
[gpu11] 2022-06-07 23:30:50,435 (trainer:672) INFO: 5epoch:train:1901-2375batch: iter_time=1.549e-04, forward_time=0.049, loss_ctc=54.817, loss_transducer=29.055, loss=11.375, backward_time=0.046, optim_step_time=0.033, optim0_lr0=3.211e-04, train_time=0.547
[gpu11] 2022-06-07 23:31:55,598 (trainer:672) INFO: 5epoch:train:2376-2850batch: iter_time=1.551e-04, forward_time=0.049, loss_ctc=54.809, loss_transducer=29.211, loss=11.414, backward_time=0.045, optim_step_time=0.033, optim0_lr0=3.249e-04, train_time=0.548
[gpu11] 2022-06-07 23:33:01,160 (trainer:672) INFO: 5epoch:train:2851-3325batch: iter_time=1.545e-04, forward_time=0.049, loss_ctc=55.232, loss_transducer=29.183, loss=11.438, backward_time=0.045, optim_step_time=0.033, optim0_lr0=3.287e-04, train_time=0.552
[gpu11] 2022-06-07 23:34:06,698 (trainer:672) INFO: 5epoch:train:3326-3800batch: iter_time=1.635e-04, forward_time=0.049, loss_ctc=55.171, loss_transducer=29.304, loss=11.464, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.325e-04, train_time=0.551
[gpu11] 2022-06-07 23:35:11,976 (trainer:672) INFO: 5epoch:train:3801-4275batch: iter_time=1.656e-04, forward_time=0.049, loss_ctc=56.420, loss_transducer=29.987, loss=11.728, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.363e-04, train_time=0.550
[gpu11] 2022-06-07 23:36:17,507 (trainer:672) INFO: 5epoch:train:4276-4750batch: iter_time=1.660e-04, forward_time=0.049, loss_ctc=55.220, loss_transducer=29.299, loss=11.466, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.401e-04, train_time=0.551
[gpu11] 2022-06-07 23:37:23,350 (trainer:672) INFO: 5epoch:train:4751-5225batch: iter_time=1.645e-04, forward_time=0.049, loss_ctc=53.912, loss_transducer=28.442, loss=11.154, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.439e-04, train_time=0.554
[gpu11] 2022-06-07 23:38:29,741 (trainer:672) INFO: 5epoch:train:5226-5700batch: iter_time=1.526e-04, forward_time=0.050, loss_ctc=56.780, loss_transducer=29.852, loss=11.721, backward_time=0.046, optim_step_time=0.032, optim0_lr0=3.477e-04, train_time=0.559
[gpu11] 2022-06-07 23:39:35,839 (trainer:672) INFO: 5epoch:train:5701-6175batch: iter_time=1.491e-04, forward_time=0.050, loss_ctc=54.826, loss_transducer=29.071, loss=11.380, backward_time=0.046, optim_step_time=0.033, optim0_lr0=3.515e-04, train_time=0.557
[gpu11] 2022-06-07 23:40:41,864 (trainer:672) INFO: 5epoch:train:6176-6650batch: iter_time=1.541e-04, forward_time=0.050, loss_ctc=55.094, loss_transducer=29.033, loss=11.390, backward_time=0.046, optim_step_time=0.033, optim0_lr0=3.553e-04, train_time=0.556
[gpu11] 2022-06-07 23:41:47,856 (trainer:672) INFO: 5epoch:train:6651-7125batch: iter_time=1.636e-04, forward_time=0.049, loss_ctc=55.268, loss_transducer=29.082, loss=11.416, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.591e-04, train_time=0.555
[gpu11] 2022-06-07 23:42:53,423 (trainer:672) INFO: 5epoch:train:7126-7600batch: iter_time=1.649e-04, forward_time=0.049, loss_ctc=53.965, loss_transducer=28.319, loss=11.127, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.629e-04, train_time=0.552
[gpu11] 2022-06-07 23:43:59,101 (trainer:672) INFO: 5epoch:train:7601-8075batch: iter_time=1.637e-04, forward_time=0.049, loss_ctc=53.265, loss_transducer=27.982, loss=10.990, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.667e-04, train_time=0.553
[gpu11] 2022-06-07 23:45:04,186 (trainer:672) INFO: 5epoch:train:8076-8550batch: iter_time=1.640e-04, forward_time=0.049, loss_ctc=52.633, loss_transducer=27.488, loss=10.819, backward_time=0.044, optim_step_time=0.032, optim0_lr0=3.705e-04, train_time=0.548
[gpu11] 2022-06-07 23:46:10,113 (trainer:672) INFO: 5epoch:train:8551-9025batch: iter_time=1.631e-04, forward_time=0.049, loss_ctc=52.995, loss_transducer=27.462, loss=10.840, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.743e-04, train_time=0.555
[gpu11] 2022-06-07 23:47:15,952 (trainer:672) INFO: 5epoch:train:9026-9500batch: iter_time=1.656e-04, forward_time=0.049, loss_ctc=51.171, loss_transducer=26.369, loss=10.430, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.781e-04, train_time=0.554
[gpu11] 2022-06-07 23:47:31,484 (trainer:328) INFO: 5epoch results: [train] iter_time=2.074e-04, forward_time=0.049, loss_ctc=54.781, loss_transducer=28.933, loss=11.342, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.420e-04, train_time=0.553, time=21 minutes and 54.92 seconds, total_count=47515, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=50.745, cer_ctc=0.277, loss_transducer=23.568, cer_transducer=nan, wer_transducer=nan, loss=38.792, time=14.78 seconds, total_count=1405, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-07 23:47:34,514 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-07 23:47:34,546 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/2epoch.pth
[gpu11] 2022-06-07 23:47:34,546 (trainer:262) INFO: 6/20epoch started. Estimated time to finish: 5 hours, 34 minutes and 39 seconds
[gpu11] 2022-06-07 23:48:40,523 (trainer:672) INFO: 6epoch:train:1-475batch: iter_time=7.514e-04, forward_time=0.049, loss_ctc=52.536, loss_transducer=26.709, loss=10.617, backward_time=0.045, optim_step_time=0.033, optim0_lr0=3.819e-04, train_time=0.556
[gpu11] 2022-06-07 23:49:46,675 (trainer:672) INFO: 6epoch:train:476-950batch: iter_time=1.877e-04, forward_time=0.050, loss_ctc=53.237, loss_transducer=27.190, loss=10.790, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.857e-04, train_time=0.557
[gpu11] 2022-06-07 23:50:52,153 (trainer:672) INFO: 6epoch:train:951-1425batch: iter_time=1.866e-04, forward_time=0.049, loss_ctc=53.876, loss_transducer=27.300, loss=10.866, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.895e-04, train_time=0.551
[gpu11] 2022-06-07 23:51:58,998 (trainer:672) INFO: 6epoch:train:1426-1900batch: iter_time=1.868e-04, forward_time=0.050, loss_ctc=52.346, loss_transducer=26.715, loss=10.605, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.933e-04, train_time=0.563
[gpu11] 2022-06-07 23:53:05,574 (trainer:672) INFO: 6epoch:train:1901-2375batch: iter_time=1.877e-04, forward_time=0.050, loss_ctc=50.375, loss_transducer=25.554, loss=10.167, backward_time=0.045, optim_step_time=0.032, optim0_lr0=3.971e-04, train_time=0.561
[gpu11] 2022-06-07 23:54:11,034 (trainer:672) INFO: 6epoch:train:2376-2850batch: iter_time=1.843e-04, forward_time=0.049, loss_ctc=52.154, loss_transducer=26.513, loss=10.540, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.009e-04, train_time=0.551
[gpu11] 2022-06-07 23:55:15,741 (trainer:672) INFO: 6epoch:train:2851-3325batch: iter_time=1.820e-04, forward_time=0.049, loss_ctc=51.164, loss_transducer=25.975, loss=10.331, backward_time=0.044, optim_step_time=0.032, optim0_lr0=4.047e-04, train_time=0.544
[gpu11] 2022-06-07 23:56:21,333 (trainer:672) INFO: 6epoch:train:3326-3800batch: iter_time=1.777e-04, forward_time=0.049, loss_ctc=52.540, loss_transducer=26.704, loss=10.617, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.085e-04, train_time=0.552
[gpu11] 2022-06-07 23:57:26,855 (trainer:672) INFO: 6epoch:train:3801-4275batch: iter_time=1.639e-04, forward_time=0.049, loss_ctc=50.728, loss_transducer=25.680, loss=10.225, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.123e-04, train_time=0.552
[gpu11] 2022-06-07 23:58:33,161 (trainer:672) INFO: 6epoch:train:4276-4750batch: iter_time=1.535e-04, forward_time=0.050, loss_ctc=47.909, loss_transducer=24.331, loss=9.676, backward_time=0.046, optim_step_time=0.032, optim0_lr0=4.161e-04, train_time=0.558
[gpu11] 2022-06-07 23:59:38,315 (trainer:672) INFO: 6epoch:train:4751-5225batch: iter_time=1.674e-04, forward_time=0.049, loss_ctc=50.919, loss_transducer=25.871, loss=10.287, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.199e-04, train_time=0.548
[gpu11] 2022-06-08 00:00:43,615 (trainer:672) INFO: 6epoch:train:5226-5700batch: iter_time=1.719e-04, forward_time=0.049, loss_ctc=50.896, loss_transducer=25.785, loss=10.264, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.237e-04, train_time=0.549
[gpu11] 2022-06-08 00:01:49,836 (trainer:672) INFO: 6epoch:train:5701-6175batch: iter_time=1.553e-04, forward_time=0.049, loss_ctc=49.867, loss_transducer=25.181, loss=10.035, backward_time=0.046, optim_step_time=0.032, optim0_lr0=4.275e-04, train_time=0.558
[gpu11] 2022-06-08 00:02:56,003 (trainer:672) INFO: 6epoch:train:6176-6650batch: iter_time=1.572e-04, forward_time=0.050, loss_ctc=50.162, loss_transducer=25.381, loss=10.107, backward_time=0.045, optim_step_time=0.033, optim0_lr0=4.313e-04, train_time=0.557
[gpu11] 2022-06-08 00:04:01,554 (trainer:672) INFO: 6epoch:train:6651-7125batch: iter_time=1.570e-04, forward_time=0.049, loss_ctc=50.220, loss_transducer=25.438, loss=10.126, backward_time=0.046, optim_step_time=0.033, optim0_lr0=4.351e-04, train_time=0.552
[gpu11] 2022-06-08 00:05:08,072 (trainer:672) INFO: 6epoch:train:7126-7600batch: iter_time=1.655e-04, forward_time=0.050, loss_ctc=49.213, loss_transducer=24.992, loss=9.939, backward_time=0.046, optim_step_time=0.033, optim0_lr0=4.389e-04, train_time=0.560
[gpu11] 2022-06-08 00:06:13,672 (trainer:672) INFO: 6epoch:train:7601-8075batch: iter_time=1.652e-04, forward_time=0.049, loss_ctc=49.117, loss_transducer=24.769, loss=9.876, backward_time=0.046, optim_step_time=0.033, optim0_lr0=4.427e-04, train_time=0.553
[gpu11] 2022-06-08 00:07:18,286 (trainer:672) INFO: 6epoch:train:8076-8550batch: iter_time=1.615e-04, forward_time=0.048, loss_ctc=49.363, loss_transducer=24.740, loss=9.887, backward_time=0.044, optim_step_time=0.032, optim0_lr0=4.465e-04, train_time=0.544
[gpu11] 2022-06-08 00:08:23,426 (trainer:672) INFO: 6epoch:train:8551-9025batch: iter_time=1.669e-04, forward_time=0.048, loss_ctc=47.421, loss_transducer=24.038, loss=9.566, backward_time=0.045, optim_step_time=0.033, optim0_lr0=4.503e-04, train_time=0.548
[gpu11] 2022-06-08 00:09:28,696 (trainer:672) INFO: 6epoch:train:9026-9500batch: iter_time=1.708e-04, forward_time=0.049, loss_ctc=48.425, loss_transducer=24.355, loss=9.721, backward_time=0.045, optim_step_time=0.033, optim0_lr0=4.541e-04, train_time=0.549
[gpu11] 2022-06-08 00:09:44,087 (trainer:328) INFO: 6epoch results: [train] iter_time=2.000e-04, forward_time=0.049, loss_ctc=50.599, loss_transducer=25.648, loss=10.207, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.180e-04, train_time=0.553, time=21 minutes and 54.9 seconds, total_count=57018, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=47.442, cer_ctc=0.250, loss_transducer=21.370, cer_transducer=nan, wer_transducer=nan, loss=35.603, time=14.64 seconds, total_count=1686, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 00:09:47,102 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 00:09:47,134 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/3epoch.pth
[gpu11] 2022-06-08 00:09:47,135 (trainer:262) INFO: 7/20epoch started. Estimated time to finish: 5 hours, 12 minutes and 6.38 seconds
[gpu11] 2022-06-08 00:10:53,024 (trainer:672) INFO: 7epoch:train:1-475batch: iter_time=0.001, forward_time=0.049, loss_ctc=47.980, loss_transducer=23.570, loss=9.491, backward_time=0.047, optim_step_time=0.033, optim0_lr0=4.579e-04, train_time=0.555
[gpu11] 2022-06-08 00:11:59,298 (trainer:672) INFO: 7epoch:train:476-950batch: iter_time=1.606e-04, forward_time=0.050, loss_ctc=49.862, loss_transducer=24.586, loss=9.886, backward_time=0.047, optim_step_time=0.033, optim0_lr0=4.617e-04, train_time=0.558
[gpu11] 2022-06-08 00:13:05,822 (trainer:672) INFO: 7epoch:train:951-1425batch: iter_time=1.582e-04, forward_time=0.050, loss_ctc=46.630, loss_transducer=22.757, loss=9.186, backward_time=0.046, optim_step_time=0.033, optim0_lr0=4.655e-04, train_time=0.560
[gpu11] 2022-06-08 00:14:11,224 (trainer:672) INFO: 7epoch:train:1426-1900batch: iter_time=1.668e-04, forward_time=0.049, loss_ctc=46.541, loss_transducer=22.939, loss=9.225, backward_time=0.046, optim_step_time=0.033, optim0_lr0=4.693e-04, train_time=0.550
[gpu11] 2022-06-08 00:15:17,024 (trainer:672) INFO: 7epoch:train:1901-2375batch: iter_time=1.566e-04, forward_time=0.049, loss_ctc=48.609, loss_transducer=24.001, loss=9.646, backward_time=0.047, optim_step_time=0.033, optim0_lr0=4.731e-04, train_time=0.554
[gpu11] 2022-06-08 00:16:23,354 (trainer:672) INFO: 7epoch:train:2376-2850batch: iter_time=1.590e-04, forward_time=0.050, loss_ctc=47.325, loss_transducer=23.266, loss=9.366, backward_time=0.048, optim_step_time=0.033, optim0_lr0=4.769e-04, train_time=0.559
[gpu11] 2022-06-08 00:17:29,526 (trainer:672) INFO: 7epoch:train:2851-3325batch: iter_time=1.533e-04, forward_time=0.049, loss_ctc=47.512, loss_transducer=23.669, loss=9.481, backward_time=0.048, optim_step_time=0.033, optim0_lr0=4.807e-04, train_time=0.557
[gpu11] 2022-06-08 00:18:36,088 (trainer:672) INFO: 7epoch:train:3326-3800batch: iter_time=1.550e-04, forward_time=0.050, loss_ctc=48.387, loss_transducer=23.951, loss=9.617, backward_time=0.048, optim_step_time=0.033, optim0_lr0=4.845e-04, train_time=0.560
[gpu11] 2022-06-08 00:19:41,449 (trainer:672) INFO: 7epoch:train:3801-4275batch: iter_time=1.613e-04, forward_time=0.049, loss_ctc=47.436, loss_transducer=23.374, loss=9.401, backward_time=0.046, optim_step_time=0.033, optim0_lr0=4.883e-04, train_time=0.551
[gpu11] 2022-06-08 00:20:47,546 (trainer:672) INFO: 7epoch:train:4276-4750batch: iter_time=1.582e-04, forward_time=0.049, loss_ctc=46.464, loss_transducer=22.852, loss=9.198, backward_time=0.048, optim_step_time=0.033, optim0_lr0=4.921e-04, train_time=0.556
[gpu11] 2022-06-08 00:21:52,755 (trainer:672) INFO: 7epoch:train:4751-5225batch: iter_time=1.622e-04, forward_time=0.048, loss_ctc=48.196, loss_transducer=23.772, loss=9.558, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.959e-04, train_time=0.549
[gpu11] 2022-06-08 00:22:58,257 (trainer:672) INFO: 7epoch:train:5226-5700batch: iter_time=1.642e-04, forward_time=0.049, loss_ctc=47.981, loss_transducer=23.508, loss=9.476, backward_time=0.045, optim_step_time=0.032, optim0_lr0=4.997e-04, train_time=0.551
[gpu11] 2022-06-08 00:24:03,474 (trainer:672) INFO: 7epoch:train:5701-6175batch: iter_time=1.645e-04, forward_time=0.049, loss_ctc=48.401, loss_transducer=23.889, loss=9.602, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.035e-04, train_time=0.549
[gpu11] 2022-06-08 00:25:09,423 (trainer:672) INFO: 7epoch:train:6176-6650batch: iter_time=1.644e-04, forward_time=0.049, loss_ctc=45.885, loss_transducer=22.804, loss=9.142, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.073e-04, train_time=0.555
[gpu11] 2022-06-08 00:26:14,914 (trainer:672) INFO: 7epoch:train:6651-7125batch: iter_time=1.576e-04, forward_time=0.049, loss_ctc=47.143, loss_transducer=23.444, loss=9.397, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.111e-04, train_time=0.551
[gpu11] 2022-06-08 00:27:21,744 (trainer:672) INFO: 7epoch:train:7126-7600batch: iter_time=1.545e-04, forward_time=0.050, loss_ctc=46.064, loss_transducer=22.514, loss=9.083, backward_time=0.047, optim_step_time=0.033, optim0_lr0=5.149e-04, train_time=0.562
[gpu11] 2022-06-08 00:28:27,324 (trainer:672) INFO: 7epoch:train:7601-8075batch: iter_time=1.525e-04, forward_time=0.049, loss_ctc=47.143, loss_transducer=23.158, loss=9.325, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.187e-04, train_time=0.552
[gpu11] 2022-06-08 00:29:33,482 (trainer:672) INFO: 7epoch:train:8076-8550batch: iter_time=1.552e-04, forward_time=0.049, loss_ctc=49.521, loss_transducer=24.512, loss=9.842, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.225e-04, train_time=0.557
[gpu11] 2022-06-08 00:30:40,198 (trainer:672) INFO: 7epoch:train:8551-9025batch: iter_time=1.509e-04, forward_time=0.050, loss_ctc=49.639, loss_transducer=24.675, loss=9.892, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.263e-04, train_time=0.561
[gpu11] 2022-06-08 00:31:46,693 (trainer:672) INFO: 7epoch:train:9026-9500batch: iter_time=1.547e-04, forward_time=0.050, loss_ctc=48.430, loss_transducer=23.737, loss=9.567, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.301e-04, train_time=0.559
[gpu11] 2022-06-08 00:32:02,027 (trainer:328) INFO: 7epoch results: [train] iter_time=2.077e-04, forward_time=0.049, loss_ctc=47.740, loss_transducer=23.540, loss=9.465, backward_time=0.046, optim_step_time=0.033, optim0_lr0=4.940e-04, train_time=0.555, time=22 minutes and 0.28 seconds, total_count=66521, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=43.882, cer_ctc=0.229, loss_transducer=19.432, cer_transducer=nan, wer_transducer=nan, loss=32.596, time=14.61 seconds, total_count=1967, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 00:32:04,944 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 00:32:04,975 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/4epoch.pth
[gpu11] 2022-06-08 00:32:04,975 (trainer:262) INFO: 8/20epoch started. Estimated time to finish: 4 hours, 49 minutes and 49.23 seconds
[gpu11] 2022-06-08 00:33:11,168 (trainer:672) INFO: 8epoch:train:1-475batch: iter_time=0.001, forward_time=0.049, loss_ctc=46.820, loss_transducer=22.509, loss=9.139, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.339e-04, train_time=0.558
[gpu11] 2022-06-08 00:34:16,997 (trainer:672) INFO: 8epoch:train:476-950batch: iter_time=1.549e-04, forward_time=0.049, loss_ctc=47.614, loss_transducer=22.704, loss=9.247, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.377e-04, train_time=0.554
[gpu11] 2022-06-08 00:35:23,050 (trainer:672) INFO: 8epoch:train:951-1425batch: iter_time=1.555e-04, forward_time=0.050, loss_ctc=44.633, loss_transducer=21.403, loss=8.698, backward_time=0.047, optim_step_time=0.033, optim0_lr0=5.415e-04, train_time=0.556
[gpu11] 2022-06-08 00:36:29,009 (trainer:672) INFO: 8epoch:train:1426-1900batch: iter_time=1.526e-04, forward_time=0.049, loss_ctc=46.219, loss_transducer=22.104, loss=8.992, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.453e-04, train_time=0.555
[gpu11] 2022-06-08 00:37:35,186 (trainer:672) INFO: 8epoch:train:1901-2375batch: iter_time=1.524e-04, forward_time=0.050, loss_ctc=45.836, loss_transducer=22.025, loss=8.944, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.491e-04, train_time=0.557
[gpu11] 2022-06-08 00:38:41,005 (trainer:672) INFO: 8epoch:train:2376-2850batch: iter_time=1.534e-04, forward_time=0.050, loss_ctc=46.296, loss_transducer=22.288, loss=9.044, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.529e-04, train_time=0.555
[gpu11] 2022-06-08 00:39:47,859 (trainer:672) INFO: 8epoch:train:2851-3325batch: iter_time=1.536e-04, forward_time=0.050, loss_ctc=46.552, loss_transducer=22.606, loss=9.143, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.567e-04, train_time=0.562
[gpu11] 2022-06-08 00:40:53,414 (trainer:672) INFO: 8epoch:train:3326-3800batch: iter_time=1.580e-04, forward_time=0.049, loss_ctc=44.854, loss_transducer=21.725, loss=8.795, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.605e-04, train_time=0.552
[gpu11] 2022-06-08 00:41:59,834 (trainer:672) INFO: 8epoch:train:3801-4275batch: iter_time=1.617e-04, forward_time=0.050, loss_ctc=46.498, loss_transducer=22.342, loss=9.073, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.643e-04, train_time=0.560
[gpu11] 2022-06-08 00:43:04,833 (trainer:672) INFO: 8epoch:train:4276-4750batch: iter_time=1.682e-04, forward_time=0.049, loss_ctc=45.051, loss_transducer=21.689, loss=8.801, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.681e-04, train_time=0.547
[gpu11] 2022-06-08 00:44:10,443 (trainer:672) INFO: 8epoch:train:4751-5225batch: iter_time=1.672e-04, forward_time=0.049, loss_ctc=45.675, loss_transducer=22.270, loss=8.993, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.719e-04, train_time=0.552
[gpu11] 2022-06-08 00:45:16,624 (trainer:672) INFO: 8epoch:train:5226-5700batch: iter_time=1.548e-04, forward_time=0.050, loss_ctc=46.018, loss_transducer=22.285, loss=9.023, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.757e-04, train_time=0.557
[gpu11] 2022-06-08 00:46:22,724 (trainer:672) INFO: 8epoch:train:5701-6175batch: iter_time=1.664e-04, forward_time=0.050, loss_ctc=44.841, loss_transducer=21.844, loss=8.824, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.795e-04, train_time=0.556
[gpu11] 2022-06-08 00:47:28,869 (trainer:672) INFO: 8epoch:train:6176-6650batch: iter_time=1.632e-04, forward_time=0.049, loss_ctc=44.539, loss_transducer=21.476, loss=8.709, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.833e-04, train_time=0.557
[gpu11] 2022-06-08 00:48:34,620 (trainer:672) INFO: 8epoch:train:6651-7125batch: iter_time=1.632e-04, forward_time=0.049, loss_ctc=46.393, loss_transducer=22.450, loss=9.092, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.871e-04, train_time=0.553
[gpu11] 2022-06-08 00:49:39,928 (trainer:672) INFO: 8epoch:train:7126-7600batch: iter_time=1.569e-04, forward_time=0.049, loss_ctc=43.266, loss_transducer=20.762, loss=8.435, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.909e-04, train_time=0.549
[gpu11] 2022-06-08 00:50:44,812 (trainer:672) INFO: 8epoch:train:7601-8075batch: iter_time=1.671e-04, forward_time=0.049, loss_ctc=43.510, loss_transducer=21.026, loss=8.520, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.947e-04, train_time=0.547
[gpu11] 2022-06-08 00:51:51,142 (trainer:672) INFO: 8epoch:train:8076-8550batch: iter_time=1.652e-04, forward_time=0.049, loss_ctc=46.858, loss_transducer=22.876, loss=9.233, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.985e-04, train_time=0.558
[gpu11] 2022-06-08 00:52:57,187 (trainer:672) INFO: 8epoch:train:8551-9025batch: iter_time=1.619e-04, forward_time=0.049, loss_ctc=42.431, loss_transducer=20.580, loss=8.327, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.023e-04, train_time=0.556
[gpu11] 2022-06-08 00:54:02,485 (trainer:672) INFO: 8epoch:train:9026-9500batch: iter_time=1.611e-04, forward_time=0.049, loss_ctc=43.047, loss_transducer=20.885, loss=8.450, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.061e-04, train_time=0.549
[gpu11] 2022-06-08 00:54:17,888 (trainer:328) INFO: 8epoch results: [train] iter_time=2.109e-04, forward_time=0.049, loss_ctc=45.338, loss_transducer=21.887, loss=8.872, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.700e-04, train_time=0.555, time=21 minutes and 58.29 seconds, total_count=76024, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=41.995, cer_ctc=0.217, loss_transducer=18.417, cer_transducer=nan, wer_transducer=nan, loss=31.015, time=14.62 seconds, total_count=2248, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 00:54:20,851 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 00:54:20,882 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/5epoch.pth
[gpu11] 2022-06-08 00:54:20,882 (trainer:262) INFO: 9/20epoch started. Estimated time to finish: 4 hours, 27 minutes and 29.01 seconds
[gpu11] 2022-06-08 00:55:26,207 (trainer:672) INFO: 9epoch:train:1-475batch: iter_time=0.001, forward_time=0.048, loss_ctc=41.120, loss_transducer=19.185, loss=7.880, backward_time=0.044, optim_step_time=0.032, optim0_lr0=6.099e-04, train_time=0.550
[gpu11] 2022-06-08 00:56:32,999 (trainer:672) INFO: 9epoch:train:476-950batch: iter_time=1.581e-04, forward_time=0.050, loss_ctc=46.398, loss_transducer=21.837, loss=8.939, backward_time=0.047, optim_step_time=0.033, optim0_lr0=6.137e-04, train_time=0.562
[gpu11] 2022-06-08 00:57:38,559 (trainer:672) INFO: 9epoch:train:951-1425batch: iter_time=1.615e-04, forward_time=0.049, loss_ctc=41.339, loss_transducer=19.282, loss=7.921, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.175e-04, train_time=0.552
[gpu11] 2022-06-08 00:58:44,291 (trainer:672) INFO: 9epoch:train:1426-1900batch: iter_time=1.683e-04, forward_time=0.049, loss_ctc=45.151, loss_transducer=21.347, loss=8.723, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.213e-04, train_time=0.553
[gpu11] 2022-06-08 00:59:49,001 (trainer:672) INFO: 9epoch:train:1901-2375batch: iter_time=1.634e-04, forward_time=0.048, loss_ctc=42.904, loss_transducer=20.259, loss=8.283, backward_time=0.044, optim_step_time=0.032, optim0_lr0=6.251e-04, train_time=0.546
[gpu11] 2022-06-08 01:00:53,930 (trainer:672) INFO: 9epoch:train:2376-2850batch: iter_time=1.572e-04, forward_time=0.049, loss_ctc=44.510, loss_transducer=21.004, loss=8.589, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.289e-04, train_time=0.546
[gpu11] 2022-06-08 01:01:59,058 (trainer:672) INFO: 9epoch:train:2851-3325batch: iter_time=1.517e-04, forward_time=0.049, loss_ctc=42.382, loss_transducer=20.208, loss=8.231, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.327e-04, train_time=0.548
[gpu11] 2022-06-08 01:03:04,644 (trainer:672) INFO: 9epoch:train:3326-3800batch: iter_time=1.670e-04, forward_time=0.049, loss_ctc=42.679, loss_transducer=20.201, loss=8.251, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.365e-04, train_time=0.552
[gpu11] 2022-06-08 01:04:09,793 (trainer:672) INFO: 9epoch:train:3801-4275batch: iter_time=1.660e-04, forward_time=0.049, loss_ctc=41.475, loss_transducer=19.893, loss=8.084, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.403e-04, train_time=0.549
[gpu11] 2022-06-08 01:05:15,989 (trainer:672) INFO: 9epoch:train:4276-4750batch: iter_time=1.562e-04, forward_time=0.049, loss_ctc=44.388, loss_transducer=21.386, loss=8.676, backward_time=0.046, optim_step_time=0.032, optim0_lr0=6.441e-04, train_time=0.557
[gpu11] 2022-06-08 01:06:21,389 (trainer:672) INFO: 9epoch:train:4751-5225batch: iter_time=1.658e-04, forward_time=0.049, loss_ctc=46.664, loss_transducer=22.289, loss=9.072, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.479e-04, train_time=0.551
[gpu11] 2022-06-08 01:07:26,369 (trainer:672) INFO: 9epoch:train:5226-5700batch: iter_time=1.641e-04, forward_time=0.048, loss_ctc=43.862, loss_transducer=21.001, loss=8.540, backward_time=0.044, optim_step_time=0.032, optim0_lr0=6.517e-04, train_time=0.547
[gpu11] 2022-06-08 01:08:32,390 (trainer:672) INFO: 9epoch:train:5701-6175batch: iter_time=1.666e-04, forward_time=0.049, loss_ctc=40.636, loss_transducer=19.337, loss=7.882, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.555e-04, train_time=0.556
[gpu11] 2022-06-08 01:09:38,384 (trainer:672) INFO: 9epoch:train:6176-6650batch: iter_time=1.587e-04, forward_time=0.049, loss_ctc=43.565, loss_transducer=20.850, loss=8.480, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.593e-04, train_time=0.555
[gpu11] 2022-06-08 01:10:44,826 (trainer:672) INFO: 9epoch:train:6651-7125batch: iter_time=1.541e-04, forward_time=0.050, loss_ctc=43.823, loss_transducer=20.981, loss=8.532, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.631e-04, train_time=0.560
[gpu11] 2022-06-08 01:11:50,161 (trainer:672) INFO: 9epoch:train:7126-7600batch: iter_time=1.612e-04, forward_time=0.048, loss_ctc=44.129, loss_transducer=20.939, loss=8.544, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.669e-04, train_time=0.549
[gpu11] 2022-06-08 01:12:55,658 (trainer:672) INFO: 9epoch:train:7601-8075batch: iter_time=1.579e-04, forward_time=0.049, loss_ctc=44.677, loss_transducer=21.523, loss=8.731, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.707e-04, train_time=0.552
[gpu11] 2022-06-08 01:14:00,630 (trainer:672) INFO: 9epoch:train:8076-8550batch: iter_time=1.511e-04, forward_time=0.049, loss_ctc=42.786, loss_transducer=20.430, loss=8.316, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.745e-04, train_time=0.547
[gpu11] 2022-06-08 01:15:06,504 (trainer:672) INFO: 9epoch:train:8551-9025batch: iter_time=1.571e-04, forward_time=0.049, loss_ctc=44.243, loss_transducer=21.259, loss=8.633, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.783e-04, train_time=0.554
[gpu11] 2022-06-08 01:16:12,422 (trainer:672) INFO: 9epoch:train:9026-9500batch: iter_time=1.554e-04, forward_time=0.049, loss_ctc=45.149, loss_transducer=21.624, loss=8.792, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.821e-04, train_time=0.555
[gpu11] 2022-06-08 01:16:27,739 (trainer:328) INFO: 9epoch results: [train] iter_time=2.096e-04, forward_time=0.049, loss_ctc=43.565, loss_transducer=20.728, loss=8.449, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.460e-04, train_time=0.552, time=21 minutes and 52.26 seconds, total_count=85527, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=40.490, cer_ctc=0.208, loss_transducer=17.464, cer_transducer=nan, wer_transducer=nan, loss=29.610, time=14.59 seconds, total_count=2529, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 01:16:30,579 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 01:16:30,612 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/6epoch.pth
[gpu11] 2022-06-08 01:16:30,613 (trainer:262) INFO: 10/20epoch started. Estimated time to finish: 4 hours, 5 minutes and 2.19 seconds
[gpu11] 2022-06-08 01:17:37,078 (trainer:672) INFO: 10epoch:train:1-475batch: iter_time=0.001, forward_time=0.049, loss_ctc=45.061, loss_transducer=20.687, loss=8.551, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.859e-04, train_time=0.560
[gpu11] 2022-06-08 01:18:42,773 (trainer:672) INFO: 10epoch:train:476-950batch: iter_time=1.718e-04, forward_time=0.049, loss_ctc=42.240, loss_transducer=19.779, loss=8.113, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.897e-04, train_time=0.553
[gpu11] 2022-06-08 01:19:48,597 (trainer:672) INFO: 10epoch:train:951-1425batch: iter_time=1.720e-04, forward_time=0.049, loss_ctc=41.210, loss_transducer=19.129, loss=7.873, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.935e-04, train_time=0.554
[gpu11] 2022-06-08 01:20:54,216 (trainer:672) INFO: 10epoch:train:1426-1900batch: iter_time=1.685e-04, forward_time=0.049, loss_ctc=42.273, loss_transducer=19.512, loss=8.048, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.973e-04, train_time=0.552
[gpu11] 2022-06-08 01:21:59,922 (trainer:672) INFO: 10epoch:train:1901-2375batch: iter_time=1.579e-04, forward_time=0.049, loss_ctc=42.945, loss_transducer=19.978, loss=8.216, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.011e-04, train_time=0.554
[gpu11] 2022-06-08 01:23:05,049 (trainer:672) INFO: 10epoch:train:2376-2850batch: iter_time=1.681e-04, forward_time=0.049, loss_ctc=40.119, loss_transducer=18.813, loss=7.712, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.049e-04, train_time=0.548
[gpu11] 2022-06-08 01:24:09,877 (trainer:672) INFO: 10epoch:train:2851-3325batch: iter_time=1.642e-04, forward_time=0.048, loss_ctc=40.651, loss_transducer=19.033, loss=7.807, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.087e-04, train_time=0.545
[gpu11] 2022-06-08 01:25:16,395 (trainer:672) INFO: 10epoch:train:3326-3800batch: iter_time=1.570e-04, forward_time=0.050, loss_ctc=42.062, loss_transducer=19.936, loss=8.139, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.125e-04, train_time=0.560
[gpu11] 2022-06-08 01:26:22,498 (trainer:672) INFO: 10epoch:train:3801-4275batch: iter_time=1.472e-04, forward_time=0.050, loss_ctc=40.672, loss_transducer=19.155, loss=7.839, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.163e-04, train_time=0.557
[gpu11] 2022-06-08 01:27:28,426 (trainer:672) INFO: 10epoch:train:4276-4750batch: iter_time=1.597e-04, forward_time=0.049, loss_ctc=41.678, loss_transducer=19.812, loss=8.079, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.201e-04, train_time=0.555
[gpu11] 2022-06-08 01:28:33,922 (trainer:672) INFO: 10epoch:train:4751-5225batch: iter_time=1.666e-04, forward_time=0.049, loss_ctc=40.539, loss_transducer=19.091, loss=7.813, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.239e-04, train_time=0.551
[gpu11] 2022-06-08 01:29:38,881 (trainer:672) INFO: 10epoch:train:5226-5700batch: iter_time=1.662e-04, forward_time=0.048, loss_ctc=42.430, loss_transducer=20.148, loss=8.219, backward_time=0.045, optim_step_time=0.032, optim0_lr0=7.277e-04, train_time=0.546
[gpu11] 2022-06-08 01:30:44,172 (trainer:672) INFO: 10epoch:train:5701-6175batch: iter_time=1.675e-04, forward_time=0.049, loss_ctc=42.147, loss_transducer=20.046, loss=8.173, backward_time=0.044, optim_step_time=0.033, optim0_lr0=7.315e-04, train_time=0.550
[gpu11] 2022-06-08 01:31:48,524 (trainer:672) INFO: 10epoch:train:6176-6650batch: iter_time=1.611e-04, forward_time=0.048, loss_ctc=40.784, loss_transducer=19.421, loss=7.914, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.353e-04, train_time=0.541
[gpu11] 2022-06-08 01:32:54,980 (trainer:672) INFO: 10epoch:train:6651-7125batch: iter_time=1.516e-04, forward_time=0.049, loss_ctc=44.783, loss_transducer=21.275, loss=8.677, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.391e-04, train_time=0.559
[gpu11] 2022-06-08 01:34:01,098 (trainer:672) INFO: 10epoch:train:7126-7600batch: iter_time=1.505e-04, forward_time=0.049, loss_ctc=40.952, loss_transducer=19.339, loss=7.906, backward_time=0.049, optim_step_time=0.033, optim0_lr0=7.429e-04, train_time=0.556
[gpu11] 2022-06-08 01:35:08,593 (trainer:672) INFO: 10epoch:train:7601-8075batch: iter_time=1.449e-04, forward_time=0.050, loss_ctc=42.055, loss_transducer=20.018, loss=8.159, backward_time=0.050, optim_step_time=0.033, optim0_lr0=7.467e-04, train_time=0.568
[gpu11] 2022-06-08 01:36:15,812 (trainer:672) INFO: 10epoch:train:8076-8550batch: iter_time=1.490e-04, forward_time=0.050, loss_ctc=44.092, loss_transducer=20.903, loss=8.533, backward_time=0.049, optim_step_time=0.033, optim0_lr0=7.505e-04, train_time=0.566
[gpu11] 2022-06-08 01:37:22,035 (trainer:672) INFO: 10epoch:train:8551-9025batch: iter_time=1.423e-04, forward_time=0.050, loss_ctc=40.470, loss_transducer=19.298, loss=7.860, backward_time=0.048, optim_step_time=0.033, optim0_lr0=7.543e-04, train_time=0.557
[gpu11] 2022-06-08 01:38:28,612 (trainer:672) INFO: 10epoch:train:9026-9500batch: iter_time=1.563e-04, forward_time=0.050, loss_ctc=43.402, loss_transducer=20.639, loss=8.415, backward_time=0.049, optim_step_time=0.033, optim0_lr0=7.581e-04, train_time=0.560
[gpu11] 2022-06-08 01:38:43,966 (trainer:328) INFO: 10epoch results: [train] iter_time=2.093e-04, forward_time=0.049, loss_ctc=42.026, loss_transducer=19.801, loss=8.102, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.220e-04, train_time=0.555, time=21 minutes and 58.79 seconds, total_count=95030, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=38.401, cer_ctc=0.203, loss_transducer=16.265, cer_transducer=nan, wer_transducer=nan, loss=27.786, time=14.56 seconds, total_count=2810, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 01:38:46,966 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 01:38:46,995 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/7epoch.pth
[gpu11] 2022-06-08 01:38:46,995 (trainer:262) INFO: 11/20epoch started. Estimated time to finish: 3 hours, 42 minutes and 45.45 seconds
[gpu11] 2022-06-08 01:39:53,870 (trainer:672) INFO: 11epoch:train:1-475batch: iter_time=0.001, forward_time=0.050, loss_ctc=40.114, loss_transducer=18.117, loss=7.538, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.619e-04, train_time=0.563
[gpu11] 2022-06-08 01:41:00,859 (trainer:672) INFO: 11epoch:train:476-950batch: iter_time=1.718e-04, forward_time=0.050, loss_ctc=42.169, loss_transducer=19.184, loss=7.959, backward_time=0.048, optim_step_time=0.033, optim0_lr0=7.657e-04, train_time=0.564
[gpu11] 2022-06-08 01:42:07,378 (trainer:672) INFO: 11epoch:train:951-1425batch: iter_time=1.699e-04, forward_time=0.050, loss_ctc=40.823, loss_transducer=18.568, loss=7.704, backward_time=0.048, optim_step_time=0.033, optim0_lr0=7.695e-04, train_time=0.560
[gpu11] 2022-06-08 01:43:13,672 (trainer:672) INFO: 11epoch:train:1426-1900batch: iter_time=1.701e-04, forward_time=0.050, loss_ctc=40.905, loss_transducer=18.969, loss=7.810, backward_time=0.048, optim_step_time=0.033, optim0_lr0=7.733e-04, train_time=0.558
[gpu11] 2022-06-08 01:44:19,977 (trainer:672) INFO: 11epoch:train:1901-2375batch: iter_time=1.787e-04, forward_time=0.050, loss_ctc=40.762, loss_transducer=18.949, loss=7.794, backward_time=0.048, optim_step_time=0.033, optim0_lr0=7.771e-04, train_time=0.558
[gpu11] 2022-06-08 01:45:26,091 (trainer:672) INFO: 11epoch:train:2376-2850batch: iter_time=1.975e-04, forward_time=0.050, loss_ctc=39.265, loss_transducer=18.137, loss=7.479, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.809e-04, train_time=0.557
[gpu11] 2022-06-08 01:46:33,191 (trainer:672) INFO: 11epoch:train:2851-3325batch: iter_time=2.030e-04, forward_time=0.050, loss_ctc=42.099, loss_transducer=19.637, loss=8.067, backward_time=0.048, optim_step_time=0.033, optim0_lr0=7.847e-04, train_time=0.565
[gpu11] 2022-06-08 01:47:39,868 (trainer:672) INFO: 11epoch:train:3326-3800batch: iter_time=2.039e-04, forward_time=0.050, loss_ctc=41.329, loss_transducer=19.537, loss=7.984, backward_time=0.048, optim_step_time=0.033, optim0_lr0=7.885e-04, train_time=0.561
[gpu11] 2022-06-08 01:48:46,511 (trainer:672) INFO: 11epoch:train:3801-4275batch: iter_time=1.728e-04, forward_time=0.050, loss_ctc=41.693, loss_transducer=19.530, loss=8.010, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.923e-04, train_time=0.561
[gpu11] 2022-06-08 01:49:52,852 (trainer:672) INFO: 11epoch:train:4276-4750batch: iter_time=1.710e-04, forward_time=0.050, loss_ctc=39.483, loss_transducer=18.712, loss=7.639, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.961e-04, train_time=0.559
[gpu11] 2022-06-08 01:50:59,794 (trainer:672) INFO: 11epoch:train:4751-5225batch: iter_time=1.725e-04, forward_time=0.051, loss_ctc=40.343, loss_transducer=19.194, loss=7.824, backward_time=0.048, optim_step_time=0.033, optim0_lr0=7.993e-04, train_time=0.563
[gpu11] 2022-06-08 01:52:06,609 (trainer:672) INFO: 11epoch:train:5226-5700batch: iter_time=1.750e-04, forward_time=0.050, loss_ctc=40.155, loss_transducer=18.906, loss=7.738, backward_time=0.049, optim_step_time=0.033, optim0_lr0=7.981e-04, train_time=0.562
[gpu11] 2022-06-08 01:53:13,258 (trainer:672) INFO: 11epoch:train:5701-6175batch: iter_time=1.787e-04, forward_time=0.050, loss_ctc=38.003, loss_transducer=18.050, loss=7.363, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.963e-04, train_time=0.562
[gpu11] 2022-06-08 01:54:19,806 (trainer:672) INFO: 11epoch:train:6176-6650batch: iter_time=1.989e-04, forward_time=0.050, loss_ctc=40.325, loss_transducer=18.843, loss=7.735, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.944e-04, train_time=0.560
[gpu11] 2022-06-08 01:55:25,766 (trainer:672) INFO: 11epoch:train:6651-7125batch: iter_time=1.808e-04, forward_time=0.050, loss_ctc=39.848, loss_transducer=18.827, loss=7.695, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.925e-04, train_time=0.555
[gpu11] 2022-06-08 01:56:31,881 (trainer:672) INFO: 11epoch:train:7126-7600batch: iter_time=1.904e-04, forward_time=0.050, loss_ctc=42.119, loss_transducer=19.918, loss=8.139, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.907e-04, train_time=0.556
[gpu11] 2022-06-08 01:57:37,622 (trainer:672) INFO: 11epoch:train:7601-8075batch: iter_time=1.927e-04, forward_time=0.050, loss_ctc=40.432, loss_transducer=19.138, loss=7.817, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.889e-04, train_time=0.554
[gpu11] 2022-06-08 01:58:43,306 (trainer:672) INFO: 11epoch:train:8076-8550batch: iter_time=1.770e-04, forward_time=0.050, loss_ctc=39.926, loss_transducer=19.019, loss=7.749, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.871e-04, train_time=0.553
[gpu11] 2022-06-08 01:59:48,899 (trainer:672) INFO: 11epoch:train:8551-9025batch: iter_time=1.682e-04, forward_time=0.050, loss_ctc=39.498, loss_transducer=18.790, loss=7.660, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.853e-04, train_time=0.552
[gpu11] 2022-06-08 02:00:55,319 (trainer:672) INFO: 11epoch:train:9026-9500batch: iter_time=1.688e-04, forward_time=0.050, loss_ctc=40.865, loss_transducer=19.436, loss=7.924, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.835e-04, train_time=0.559
[gpu11] 2022-06-08 02:01:10,631 (trainer:328) INFO: 11epoch results: [train] iter_time=2.278e-04, forward_time=0.050, loss_ctc=40.508, loss_transducer=18.973, loss=7.781, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.853e-04, train_time=0.559, time=22 minutes and 9.04 seconds, total_count=104533, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=36.588, cer_ctc=0.188, loss_transducer=15.605, cer_transducer=nan, wer_transducer=nan, loss=26.582, time=14.59 seconds, total_count=3091, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 02:01:13,534 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 02:01:13,566 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/8epoch.pth
[gpu11] 2022-06-08 02:01:13,566 (trainer:262) INFO: 12/20epoch started. Estimated time to finish: 3 hours, 20 minutes and 37.11 seconds
[gpu11] 2022-06-08 02:02:19,600 (trainer:672) INFO: 12epoch:train:1-475batch: iter_time=0.001, forward_time=0.050, loss_ctc=37.088, loss_transducer=16.681, loss=6.952, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.817e-04, train_time=0.556
[gpu11] 2022-06-08 02:03:25,722 (trainer:672) INFO: 12epoch:train:476-950batch: iter_time=1.735e-04, forward_time=0.050, loss_ctc=35.650, loss_transducer=16.259, loss=6.739, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.799e-04, train_time=0.556
[gpu11] 2022-06-08 02:04:30,832 (trainer:672) INFO: 12epoch:train:951-1425batch: iter_time=1.884e-04, forward_time=0.049, loss_ctc=37.959, loss_transducer=17.390, loss=7.194, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.782e-04, train_time=0.548
[gpu11] 2022-06-08 02:05:37,464 (trainer:672) INFO: 12epoch:train:1426-1900batch: iter_time=1.828e-04, forward_time=0.050, loss_ctc=37.861, loss_transducer=17.227, loss=7.146, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.764e-04, train_time=0.561
[gpu11] 2022-06-08 02:06:43,572 (trainer:672) INFO: 12epoch:train:1901-2375batch: iter_time=1.755e-04, forward_time=0.050, loss_ctc=37.839, loss_transducer=17.374, loss=7.181, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.747e-04, train_time=0.557
[gpu11] 2022-06-08 02:07:49,373 (trainer:672) INFO: 12epoch:train:2376-2850batch: iter_time=1.812e-04, forward_time=0.049, loss_ctc=37.165, loss_transducer=17.117, loss=7.067, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.730e-04, train_time=0.554
[gpu11] 2022-06-08 02:08:55,073 (trainer:672) INFO: 12epoch:train:2851-3325batch: iter_time=1.785e-04, forward_time=0.049, loss_ctc=38.643, loss_transducer=17.681, loss=7.318, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.713e-04, train_time=0.553
[gpu11] 2022-06-08 02:10:00,307 (trainer:672) INFO: 12epoch:train:3326-3800batch: iter_time=1.714e-04, forward_time=0.049, loss_ctc=37.811, loss_transducer=17.420, loss=7.191, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.696e-04, train_time=0.549
[gpu11] 2022-06-08 02:11:06,922 (trainer:672) INFO: 12epoch:train:3801-4275batch: iter_time=1.623e-04, forward_time=0.050, loss_ctc=39.081, loss_transducer=17.988, loss=7.428, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.679e-04, train_time=0.561
[gpu11] 2022-06-08 02:12:12,972 (trainer:672) INFO: 12epoch:train:4276-4750batch: iter_time=1.566e-04, forward_time=0.050, loss_ctc=39.486, loss_transducer=18.212, loss=7.514, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.662e-04, train_time=0.556
[gpu11] 2022-06-08 02:13:18,906 (trainer:672) INFO: 12epoch:train:4751-5225batch: iter_time=1.611e-04, forward_time=0.050, loss_ctc=38.544, loss_transducer=17.667, loss=7.308, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.645e-04, train_time=0.555
[gpu11] 2022-06-08 02:14:25,394 (trainer:672) INFO: 12epoch:train:5226-5700batch: iter_time=1.612e-04, forward_time=0.050, loss_ctc=38.198, loss_transducer=17.793, loss=7.313, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.629e-04, train_time=0.559
[gpu11] 2022-06-08 02:15:31,140 (trainer:672) INFO: 12epoch:train:5701-6175batch: iter_time=1.567e-04, forward_time=0.050, loss_ctc=37.642, loss_transducer=17.547, loss=7.210, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.612e-04, train_time=0.553
[gpu11] 2022-06-08 02:16:37,958 (trainer:672) INFO: 12epoch:train:6176-6650batch: iter_time=1.594e-04, forward_time=0.050, loss_ctc=38.278, loss_transducer=17.836, loss=7.330, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.596e-04, train_time=0.563
[gpu11] 2022-06-08 02:17:44,382 (trainer:672) INFO: 12epoch:train:6651-7125batch: iter_time=1.611e-04, forward_time=0.050, loss_ctc=38.922, loss_transducer=17.973, loss=7.412, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.580e-04, train_time=0.559
[gpu11] 2022-06-08 02:18:50,245 (trainer:672) INFO: 12epoch:train:7126-7600batch: iter_time=1.533e-04, forward_time=0.050, loss_ctc=37.127, loss_transducer=17.365, loss=7.126, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.564e-04, train_time=0.554
[gpu11] 2022-06-08 02:19:56,111 (trainer:672) INFO: 12epoch:train:7601-8075batch: iter_time=1.640e-04, forward_time=0.050, loss_ctc=39.211, loss_transducer=17.991, loss=7.439, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.548e-04, train_time=0.555
[gpu11] 2022-06-08 02:21:01,922 (trainer:672) INFO: 12epoch:train:8076-8550batch: iter_time=1.731e-04, forward_time=0.049, loss_ctc=37.380, loss_transducer=17.213, loss=7.107, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.532e-04, train_time=0.554
[gpu11] 2022-06-08 02:22:07,767 (trainer:672) INFO: 12epoch:train:8551-9025batch: iter_time=1.658e-04, forward_time=0.049, loss_ctc=38.333, loss_transducer=17.809, loss=7.327, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.516e-04, train_time=0.554
[gpu11] 2022-06-08 02:23:13,999 (trainer:672) INFO: 12epoch:train:9026-9500batch: iter_time=1.615e-04, forward_time=0.049, loss_ctc=37.435, loss_transducer=17.535, loss=7.191, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.500e-04, train_time=0.557
[gpu11] 2022-06-08 02:23:29,346 (trainer:328) INFO: 12epoch results: [train] iter_time=2.193e-04, forward_time=0.050, loss_ctc=37.980, loss_transducer=17.502, loss=7.224, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.655e-04, train_time=0.556, time=22 minutes and 1.14 seconds, total_count=114036, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=35.245, cer_ctc=0.179, loss_transducer=14.967, cer_transducer=nan, wer_transducer=nan, loss=25.541, time=14.64 seconds, total_count=3372, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 02:23:32,382 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 02:23:32,414 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/9epoch.pth
[gpu11] 2022-06-08 02:23:32,414 (trainer:262) INFO: 13/20epoch started. Estimated time to finish: 2 hours, 58 minutes and 20.58 seconds
[gpu11] 2022-06-08 02:24:38,958 (trainer:672) INFO: 13epoch:train:1-475batch: iter_time=0.001, forward_time=0.050, loss_ctc=36.097, loss_transducer=15.987, loss=6.704, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.485e-04, train_time=0.560
[gpu11] 2022-06-08 02:25:44,668 (trainer:672) INFO: 13epoch:train:476-950batch: iter_time=1.846e-04, forward_time=0.049, loss_ctc=36.352, loss_transducer=16.075, loss=6.745, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.469e-04, train_time=0.554
[gpu11] 2022-06-08 02:26:50,010 (trainer:672) INFO: 13epoch:train:951-1425batch: iter_time=1.731e-04, forward_time=0.049, loss_ctc=35.375, loss_transducer=15.783, loss=6.599, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.454e-04, train_time=0.550
[gpu11] 2022-06-08 02:27:55,851 (trainer:672) INFO: 13epoch:train:1426-1900batch: iter_time=1.620e-04, forward_time=0.049, loss_ctc=36.294, loss_transducer=16.272, loss=6.790, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.438e-04, train_time=0.554
[gpu11] 2022-06-08 02:29:01,551 (trainer:672) INFO: 13epoch:train:1901-2375batch: iter_time=1.607e-04, forward_time=0.049, loss_ctc=35.280, loss_transducer=15.932, loss=6.629, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.423e-04, train_time=0.554
[gpu11] 2022-06-08 02:30:07,822 (trainer:672) INFO: 13epoch:train:2376-2850batch: iter_time=1.659e-04, forward_time=0.049, loss_ctc=38.200, loss_transducer=17.205, loss=7.166, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.408e-04, train_time=0.557
[gpu11] 2022-06-08 02:31:13,341 (trainer:672) INFO: 13epoch:train:2851-3325batch: iter_time=1.666e-04, forward_time=0.049, loss_ctc=35.524, loss_transducer=15.990, loss=6.662, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.393e-04, train_time=0.552
[gpu11] 2022-06-08 02:32:19,329 (trainer:672) INFO: 13epoch:train:3326-3800batch: iter_time=1.584e-04, forward_time=0.049, loss_ctc=36.124, loss_transducer=16.335, loss=6.793, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.378e-04, train_time=0.555
[gpu11] 2022-06-08 02:33:24,523 (trainer:672) INFO: 13epoch:train:3801-4275batch: iter_time=1.744e-04, forward_time=0.049, loss_ctc=35.410, loss_transducer=15.943, loss=6.642, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.363e-04, train_time=0.549
[gpu11] 2022-06-08 02:34:30,650 (trainer:672) INFO: 13epoch:train:4276-4750batch: iter_time=1.600e-04, forward_time=0.050, loss_ctc=34.305, loss_transducer=15.683, loss=6.494, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.349e-04, train_time=0.557
[gpu11] 2022-06-08 02:35:35,947 (trainer:672) INFO: 13epoch:train:4751-5225batch: iter_time=1.576e-04, forward_time=0.049, loss_ctc=36.874, loss_transducer=16.614, loss=6.919, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.334e-04, train_time=0.550
[gpu11] 2022-06-08 02:36:41,951 (trainer:672) INFO: 13epoch:train:5226-5700batch: iter_time=1.636e-04, forward_time=0.050, loss_ctc=36.485, loss_transducer=16.637, loss=6.896, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.319e-04, train_time=0.555
[gpu11] 2022-06-08 02:37:47,824 (trainer:672) INFO: 13epoch:train:5701-6175batch: iter_time=1.660e-04, forward_time=0.049, loss_ctc=36.221, loss_transducer=16.322, loss=6.797, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.305e-04, train_time=0.555
[gpu11] 2022-06-08 02:38:53,926 (trainer:672) INFO: 13epoch:train:6176-6650batch: iter_time=1.565e-04, forward_time=0.050, loss_ctc=35.808, loss_transducer=16.272, loss=6.754, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.290e-04, train_time=0.556
[gpu11] 2022-06-08 02:39:59,260 (trainer:672) INFO: 13epoch:train:6651-7125batch: iter_time=1.603e-04, forward_time=0.049, loss_ctc=34.566, loss_transducer=15.633, loss=6.501, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.276e-04, train_time=0.550
[gpu11] 2022-06-08 02:41:05,051 (trainer:672) INFO: 13epoch:train:7126-7600batch: iter_time=1.680e-04, forward_time=0.049, loss_ctc=34.838, loss_transducer=16.034, loss=6.621, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.262e-04, train_time=0.553
[gpu11] 2022-06-08 02:42:10,764 (trainer:672) INFO: 13epoch:train:7601-8075batch: iter_time=1.754e-04, forward_time=0.049, loss_ctc=33.955, loss_transducer=15.595, loss=6.445, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.248e-04, train_time=0.554
[gpu11] 2022-06-08 02:43:16,751 (trainer:672) INFO: 13epoch:train:8076-8550batch: iter_time=1.535e-04, forward_time=0.050, loss_ctc=35.597, loss_transducer=16.233, loss=6.728, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.234e-04, train_time=0.555
[gpu11] 2022-06-08 02:44:23,123 (trainer:672) INFO: 13epoch:train:8551-9025batch: iter_time=1.560e-04, forward_time=0.050, loss_ctc=35.291, loss_transducer=16.288, loss=6.719, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.219e-04, train_time=0.559
[gpu11] 2022-06-08 02:45:29,510 (trainer:672) INFO: 13epoch:train:9026-9500batch: iter_time=1.561e-04, forward_time=0.050, loss_ctc=34.799, loss_transducer=15.686, loss=6.531, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.206e-04, train_time=0.558
[gpu11] 2022-06-08 02:45:44,975 (trainer:328) INFO: 13epoch results: [train] iter_time=2.217e-04, forward_time=0.049, loss_ctc=35.659, loss_transducer=16.123, loss=6.705, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.343e-04, train_time=0.554, time=21 minutes and 57.95 seconds, total_count=123539, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=33.541, cer_ctc=0.180, loss_transducer=13.883, cer_transducer=nan, wer_transducer=nan, loss=23.945, time=14.61 seconds, total_count=3653, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 02:45:48,031 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 02:45:48,064 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/10epoch.pth
[gpu11] 2022-06-08 02:45:48,064 (trainer:262) INFO: 14/20epoch started. Estimated time to finish: 2 hours, 36 minutes and 1.97 seconds
[gpu11] 2022-06-08 02:46:54,342 (trainer:672) INFO: 14epoch:train:1-475batch: iter_time=0.001, forward_time=0.049, loss_ctc=34.093, loss_transducer=14.731, loss=6.240, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.192e-04, train_time=0.559
[gpu11] 2022-06-08 02:47:59,581 (trainer:672) INFO: 14epoch:train:476-950batch: iter_time=1.791e-04, forward_time=0.049, loss_ctc=32.246, loss_transducer=13.930, loss=5.901, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.178e-04, train_time=0.549
[gpu11] 2022-06-08 02:49:05,007 (trainer:672) INFO: 14epoch:train:951-1425batch: iter_time=1.761e-04, forward_time=0.049, loss_ctc=33.364, loss_transducer=14.718, loss=6.182, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.164e-04, train_time=0.550
[gpu11] 2022-06-08 02:50:11,036 (trainer:672) INFO: 14epoch:train:1426-1900batch: iter_time=1.712e-04, forward_time=0.049, loss_ctc=32.678, loss_transducer=14.315, loss=6.030, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.151e-04, train_time=0.555
[gpu11] 2022-06-08 02:51:17,550 (trainer:672) INFO: 14epoch:train:1901-2375batch: iter_time=1.576e-04, forward_time=0.050, loss_ctc=34.075, loss_transducer=14.903, loss=6.281, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.137e-04, train_time=0.560
[gpu11] 2022-06-08 02:52:23,538 (trainer:672) INFO: 14epoch:train:2376-2850batch: iter_time=1.593e-04, forward_time=0.050, loss_ctc=34.423, loss_transducer=15.164, loss=6.373, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.124e-04, train_time=0.556
[gpu11] 2022-06-08 02:53:29,949 (trainer:672) INFO: 14epoch:train:2851-3325batch: iter_time=1.579e-04, forward_time=0.050, loss_ctc=34.277, loss_transducer=15.025, loss=6.327, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.110e-04, train_time=0.559
[gpu11] 2022-06-08 02:54:35,134 (trainer:672) INFO: 14epoch:train:3326-3800batch: iter_time=1.724e-04, forward_time=0.049, loss_ctc=33.461, loss_transducer=14.991, loss=6.257, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.097e-04, train_time=0.549
[gpu11] 2022-06-08 02:55:40,983 (trainer:672) INFO: 14epoch:train:3801-4275batch: iter_time=1.671e-04, forward_time=0.050, loss_ctc=33.304, loss_transducer=14.800, loss=6.198, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.084e-04, train_time=0.554
[gpu11] 2022-06-08 02:56:47,041 (trainer:672) INFO: 14epoch:train:4276-4750batch: iter_time=1.559e-04, forward_time=0.050, loss_ctc=34.393, loss_transducer=15.259, loss=6.394, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.071e-04, train_time=0.556
[gpu11] 2022-06-08 02:57:51,897 (trainer:672) INFO: 14epoch:train:4751-5225batch: iter_time=1.672e-04, forward_time=0.049, loss_ctc=32.545, loss_transducer=14.528, loss=6.073, backward_time=0.045, optim_step_time=0.033, optim0_lr0=7.058e-04, train_time=0.546
[gpu11] 2022-06-08 02:58:58,139 (trainer:672) INFO: 14epoch:train:5226-5700batch: iter_time=1.565e-04, forward_time=0.050, loss_ctc=31.302, loss_transducer=14.135, loss=5.881, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.044e-04, train_time=0.557
[gpu11] 2022-06-08 03:00:04,706 (trainer:672) INFO: 14epoch:train:5701-6175batch: iter_time=1.550e-04, forward_time=0.050, loss_ctc=34.796, loss_transducer=15.585, loss=6.506, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.032e-04, train_time=0.561
[gpu11] 2022-06-08 03:01:09,998 (trainer:672) INFO: 14epoch:train:6176-6650batch: iter_time=1.579e-04, forward_time=0.049, loss_ctc=32.914, loss_transducer=14.759, loss=6.158, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.019e-04, train_time=0.550
[gpu11] 2022-06-08 03:02:15,807 (trainer:672) INFO: 14epoch:train:6651-7125batch: iter_time=1.586e-04, forward_time=0.049, loss_ctc=31.884, loss_transducer=14.291, loss=5.964, backward_time=0.047, optim_step_time=0.033, optim0_lr0=7.006e-04, train_time=0.554
[gpu11] 2022-06-08 03:03:23,415 (trainer:672) INFO: 14epoch:train:7126-7600batch: iter_time=1.616e-04, forward_time=0.050, loss_ctc=33.074, loss_transducer=15.188, loss=6.278, backward_time=0.050, optim_step_time=0.033, optim0_lr0=6.993e-04, train_time=0.569
[gpu11] 2022-06-08 03:04:29,710 (trainer:672) INFO: 14epoch:train:7601-8075batch: iter_time=1.601e-04, forward_time=0.050, loss_ctc=35.193, loss_transducer=15.831, loss=6.597, backward_time=0.047, optim_step_time=0.033, optim0_lr0=6.981e-04, train_time=0.559
[gpu11] 2022-06-08 03:05:36,069 (trainer:672) INFO: 14epoch:train:8076-8550batch: iter_time=1.651e-04, forward_time=0.049, loss_ctc=33.902, loss_transducer=15.252, loss=6.356, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.968e-04, train_time=0.558
[gpu11] 2022-06-08 03:06:41,806 (trainer:672) INFO: 14epoch:train:8551-9025batch: iter_time=1.693e-04, forward_time=0.049, loss_ctc=34.544, loss_transducer=15.686, loss=6.512, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.955e-04, train_time=0.553
[gpu11] 2022-06-08 03:07:47,400 (trainer:672) INFO: 14epoch:train:9026-9500batch: iter_time=1.693e-04, forward_time=0.049, loss_ctc=34.716, loss_transducer=15.665, loss=6.520, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.943e-04, train_time=0.552
[gpu11] 2022-06-08 03:08:03,023 (trainer:328) INFO: 14epoch results: [train] iter_time=2.112e-04, forward_time=0.050, loss_ctc=33.541, loss_transducer=14.930, loss=6.248, backward_time=0.046, optim_step_time=0.033, optim0_lr0=7.065e-04, train_time=0.555, time=22 minutes and 0.05 seconds, total_count=133042, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=31.290, cer_ctc=0.160, loss_transducer=13.050, cer_transducer=nan, wer_transducer=nan, loss=22.437, time=14.9 seconds, total_count=3934, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 03:08:06,166 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 03:08:06,199 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/11epoch.pth
[gpu11] 2022-06-08 03:08:06,199 (trainer:262) INFO: 15/20epoch started. Estimated time to finish: 2 hours, 13 minutes and 44.85 seconds
[gpu11] 2022-06-08 03:09:13,014 (trainer:672) INFO: 15epoch:train:1-475batch: iter_time=0.001, forward_time=0.050, loss_ctc=30.571, loss_transducer=13.008, loss=5.545, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.931e-04, train_time=0.563
[gpu11] 2022-06-08 03:10:18,742 (trainer:672) INFO: 15epoch:train:476-950batch: iter_time=1.892e-04, forward_time=0.049, loss_ctc=31.156, loss_transducer=13.344, loss=5.673, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.918e-04, train_time=0.553
[gpu11] 2022-06-08 03:11:25,200 (trainer:672) INFO: 15epoch:train:951-1425batch: iter_time=1.870e-04, forward_time=0.050, loss_ctc=31.862, loss_transducer=13.960, loss=5.880, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.906e-04, train_time=0.559
[gpu11] 2022-06-08 03:12:31,069 (trainer:672) INFO: 15epoch:train:1426-1900batch: iter_time=1.862e-04, forward_time=0.049, loss_ctc=30.701, loss_transducer=13.454, loss=5.666, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.894e-04, train_time=0.554
[gpu11] 2022-06-08 03:13:36,605 (trainer:672) INFO: 15epoch:train:1901-2375batch: iter_time=1.810e-04, forward_time=0.049, loss_ctc=33.630, loss_transducer=14.513, loss=6.150, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.882e-04, train_time=0.552
[gpu11] 2022-06-08 03:14:42,765 (trainer:672) INFO: 15epoch:train:2376-2850batch: iter_time=1.803e-04, forward_time=0.050, loss_ctc=32.783, loss_transducer=14.177, loss=6.003, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.870e-04, train_time=0.557
[gpu11] 2022-06-08 03:15:48,878 (trainer:672) INFO: 15epoch:train:2851-3325batch: iter_time=1.813e-04, forward_time=0.050, loss_ctc=32.448, loss_transducer=14.199, loss=5.983, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.858e-04, train_time=0.556
[gpu11] 2022-06-08 03:16:54,002 (trainer:672) INFO: 15epoch:train:3326-3800batch: iter_time=1.767e-04, forward_time=0.049, loss_ctc=32.110, loss_transducer=14.129, loss=5.941, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.846e-04, train_time=0.548
[gpu11] 2022-06-08 03:18:00,449 (trainer:672) INFO: 15epoch:train:3801-4275batch: iter_time=1.684e-04, forward_time=0.050, loss_ctc=31.502, loss_transducer=13.765, loss=5.804, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.834e-04, train_time=0.560
[gpu11] 2022-06-08 03:19:06,418 (trainer:672) INFO: 15epoch:train:4276-4750batch: iter_time=1.831e-04, forward_time=0.050, loss_ctc=32.501, loss_transducer=14.283, loss=6.008, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.822e-04, train_time=0.555
[gpu11] 2022-06-08 03:20:11,998 (trainer:672) INFO: 15epoch:train:4751-5225batch: iter_time=1.840e-04, forward_time=0.050, loss_ctc=32.077, loss_transducer=14.156, loss=5.945, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.810e-04, train_time=0.552
[gpu11] 2022-06-08 03:21:17,580 (trainer:672) INFO: 15epoch:train:5226-5700batch: iter_time=1.794e-04, forward_time=0.049, loss_ctc=29.199, loss_transducer=12.888, loss=5.412, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.799e-04, train_time=0.552
[gpu11] 2022-06-08 03:22:23,857 (trainer:672) INFO: 15epoch:train:5701-6175batch: iter_time=1.759e-04, forward_time=0.050, loss_ctc=30.068, loss_transducer=13.141, loss=5.540, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.787e-04, train_time=0.559
[gpu11] 2022-06-08 03:23:30,040 (trainer:672) INFO: 15epoch:train:6176-6650batch: iter_time=1.869e-04, forward_time=0.050, loss_ctc=31.646, loss_transducer=14.063, loss=5.889, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.775e-04, train_time=0.557
[gpu11] 2022-06-08 03:24:35,554 (trainer:672) INFO: 15epoch:train:6651-7125batch: iter_time=1.661e-04, forward_time=0.050, loss_ctc=31.765, loss_transducer=14.127, loss=5.914, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.764e-04, train_time=0.552
[gpu11] 2022-06-08 03:25:42,657 (trainer:672) INFO: 15epoch:train:7126-7600batch: iter_time=1.765e-04, forward_time=0.051, loss_ctc=31.605, loss_transducer=13.930, loss=5.853, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.752e-04, train_time=0.565
[gpu11] 2022-06-08 03:26:49,043 (trainer:672) INFO: 15epoch:train:7601-8075batch: iter_time=1.843e-04, forward_time=0.050, loss_ctc=32.964, loss_transducer=14.763, loss=6.163, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.741e-04, train_time=0.559
[gpu11] 2022-06-08 03:27:54,461 (trainer:672) INFO: 15epoch:train:8076-8550batch: iter_time=1.879e-04, forward_time=0.049, loss_ctc=30.906, loss_transducer=13.835, loss=5.777, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.730e-04, train_time=0.550
[gpu11] 2022-06-08 03:29:00,971 (trainer:672) INFO: 15epoch:train:8551-9025batch: iter_time=1.845e-04, forward_time=0.050, loss_ctc=32.954, loss_transducer=14.731, loss=6.154, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.718e-04, train_time=0.560
[gpu11] 2022-06-08 03:30:07,104 (trainer:672) INFO: 15epoch:train:9026-9500batch: iter_time=1.596e-04, forward_time=0.050, loss_ctc=32.541, loss_transducer=14.459, loss=6.055, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.707e-04, train_time=0.556
[gpu11] 2022-06-08 03:30:22,491 (trainer:328) INFO: 15epoch results: [train] iter_time=2.413e-04, forward_time=0.050, loss_ctc=31.734, loss_transducer=13.940, loss=5.865, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.817e-04, train_time=0.556, time=22 minutes and 1.72 seconds, total_count=142545, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=30.179, cer_ctc=0.153, loss_transducer=12.707, cer_transducer=nan, wer_transducer=nan, loss=21.760, time=14.57 seconds, total_count=4215, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 03:30:25,578 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 03:30:25,610 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/12epoch.pth
[gpu11] 2022-06-08 03:30:25,610 (trainer:262) INFO: 16/20epoch started. Estimated time to finish: 1 hour, 51 minutes and 28.02 seconds
[gpu11] 2022-06-08 03:31:31,765 (trainer:672) INFO: 16epoch:train:1-475batch: iter_time=0.001, forward_time=0.049, loss_ctc=29.803, loss_transducer=12.474, loss=5.354, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.696e-04, train_time=0.557
[gpu11] 2022-06-08 03:32:37,629 (trainer:672) INFO: 16epoch:train:476-950batch: iter_time=1.886e-04, forward_time=0.049, loss_ctc=29.419, loss_transducer=12.406, loss=5.308, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.685e-04, train_time=0.555
[gpu11] 2022-06-08 03:33:43,771 (trainer:672) INFO: 16epoch:train:951-1425batch: iter_time=1.849e-04, forward_time=0.049, loss_ctc=29.696, loss_transducer=12.582, loss=5.373, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.674e-04, train_time=0.556
[gpu11] 2022-06-08 03:34:50,244 (trainer:672) INFO: 16epoch:train:1426-1900batch: iter_time=1.736e-04, forward_time=0.050, loss_ctc=30.176, loss_transducer=12.938, loss=5.498, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.663e-04, train_time=0.559
[gpu11] 2022-06-08 03:35:56,632 (trainer:672) INFO: 16epoch:train:1901-2375batch: iter_time=1.880e-04, forward_time=0.050, loss_ctc=29.167, loss_transducer=12.664, loss=5.353, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.652e-04, train_time=0.560
[gpu11] 2022-06-08 03:37:02,068 (trainer:672) INFO: 16epoch:train:2376-2850batch: iter_time=1.856e-04, forward_time=0.049, loss_ctc=29.621, loss_transducer=12.812, loss=5.425, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.641e-04, train_time=0.551
[gpu11] 2022-06-08 03:38:07,259 (trainer:672) INFO: 16epoch:train:2851-3325batch: iter_time=1.826e-04, forward_time=0.049, loss_ctc=31.060, loss_transducer=13.442, loss=5.690, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.630e-04, train_time=0.549
[gpu11] 2022-06-08 03:39:14,334 (trainer:672) INFO: 16epoch:train:3326-3800batch: iter_time=1.787e-04, forward_time=0.051, loss_ctc=30.635, loss_transducer=13.392, loss=5.646, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.619e-04, train_time=0.564
[gpu11] 2022-06-08 03:40:19,955 (trainer:672) INFO: 16epoch:train:3801-4275batch: iter_time=1.673e-04, forward_time=0.050, loss_ctc=30.289, loss_transducer=13.072, loss=5.540, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.609e-04, train_time=0.553
[gpu11] 2022-06-08 03:41:26,073 (trainer:672) INFO: 16epoch:train:4276-4750batch: iter_time=1.766e-04, forward_time=0.050, loss_ctc=29.015, loss_transducer=12.497, loss=5.300, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.598e-04, train_time=0.556
[gpu11] 2022-06-08 03:42:32,739 (trainer:672) INFO: 16epoch:train:4751-5225batch: iter_time=1.971e-04, forward_time=0.050, loss_ctc=30.896, loss_transducer=13.420, loss=5.672, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.587e-04, train_time=0.561
[gpu11] 2022-06-08 03:43:38,260 (trainer:672) INFO: 16epoch:train:5226-5700batch: iter_time=1.683e-04, forward_time=0.050, loss_ctc=29.302, loss_transducer=12.961, loss=5.438, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.577e-04, train_time=0.551
[gpu11] 2022-06-08 03:44:44,115 (trainer:672) INFO: 16epoch:train:5701-6175batch: iter_time=1.706e-04, forward_time=0.050, loss_ctc=30.041, loss_transducer=13.232, loss=5.561, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.566e-04, train_time=0.555
[gpu11] 2022-06-08 03:45:50,460 (trainer:672) INFO: 16epoch:train:6176-6650batch: iter_time=1.839e-04, forward_time=0.050, loss_ctc=31.543, loss_transducer=13.782, loss=5.811, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.556e-04, train_time=0.559
[gpu11] 2022-06-08 03:46:57,475 (trainer:672) INFO: 16epoch:train:6651-7125batch: iter_time=1.779e-04, forward_time=0.051, loss_ctc=31.417, loss_transducer=13.742, loss=5.792, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.545e-04, train_time=0.564
[gpu11] 2022-06-08 03:48:03,208 (trainer:672) INFO: 16epoch:train:7126-7600batch: iter_time=1.831e-04, forward_time=0.050, loss_ctc=30.584, loss_transducer=13.551, loss=5.681, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.535e-04, train_time=0.553
[gpu11] 2022-06-08 03:49:09,562 (trainer:672) INFO: 16epoch:train:7601-8075batch: iter_time=1.815e-04, forward_time=0.050, loss_ctc=30.705, loss_transducer=13.442, loss=5.663, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.525e-04, train_time=0.559
[gpu11] 2022-06-08 03:50:15,306 (trainer:672) INFO: 16epoch:train:8076-8550batch: iter_time=1.759e-04, forward_time=0.049, loss_ctc=29.921, loss_transducer=13.034, loss=5.503, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.514e-04, train_time=0.554
[gpu11] 2022-06-08 03:51:21,075 (trainer:672) INFO: 16epoch:train:8551-9025batch: iter_time=1.878e-04, forward_time=0.050, loss_ctc=30.635, loss_transducer=13.352, loss=5.636, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.504e-04, train_time=0.553
[gpu11] 2022-06-08 03:52:27,005 (trainer:672) INFO: 16epoch:train:9026-9500batch: iter_time=1.767e-04, forward_time=0.050, loss_ctc=28.913, loss_transducer=12.753, loss=5.357, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.494e-04, train_time=0.555
[gpu11] 2022-06-08 03:52:42,368 (trainer:328) INFO: 16epoch results: [train] iter_time=2.347e-04, forward_time=0.050, loss_ctc=30.144, loss_transducer=13.078, loss=5.530, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.593e-04, train_time=0.556, time=22 minutes and 2.19 seconds, total_count=152048, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=29.593, cer_ctc=0.158, loss_transducer=12.332, cer_transducer=nan, wer_transducer=nan, loss=21.210, time=14.57 seconds, total_count=4496, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 03:52:45,344 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 03:52:45,376 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/13epoch.pth
[gpu11] 2022-06-08 03:52:45,376 (trainer:262) INFO: 17/20epoch started. Estimated time to finish: 1 hour, 29 minutes and 10.96 seconds
[gpu11] 2022-06-08 03:53:52,098 (trainer:672) INFO: 17epoch:train:1-475batch: iter_time=7.428e-04, forward_time=0.050, loss_ctc=29.041, loss_transducer=12.058, loss=5.193, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.484e-04, train_time=0.562
[gpu11] 2022-06-08 03:54:57,983 (trainer:672) INFO: 17epoch:train:476-950batch: iter_time=1.658e-04, forward_time=0.049, loss_ctc=28.264, loss_transducer=11.895, loss=5.093, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.474e-04, train_time=0.555
[gpu11] 2022-06-08 03:56:03,869 (trainer:672) INFO: 17epoch:train:951-1425batch: iter_time=1.614e-04, forward_time=0.049, loss_ctc=27.244, loss_transducer=11.455, loss=4.907, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.464e-04, train_time=0.554
[gpu11] 2022-06-08 03:57:09,160 (trainer:672) INFO: 17epoch:train:1426-1900batch: iter_time=1.567e-04, forward_time=0.049, loss_ctc=28.919, loss_transducer=12.193, loss=5.217, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.454e-04, train_time=0.549
[gpu11] 2022-06-08 03:58:14,630 (trainer:672) INFO: 17epoch:train:1901-2375batch: iter_time=1.665e-04, forward_time=0.049, loss_ctc=28.914, loss_transducer=12.249, loss=5.231, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.444e-04, train_time=0.551
[gpu11] 2022-06-08 03:59:20,902 (trainer:672) INFO: 17epoch:train:2376-2850batch: iter_time=1.707e-04, forward_time=0.049, loss_ctc=29.080, loss_transducer=12.259, loss=5.246, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.434e-04, train_time=0.558
[gpu11] 2022-06-08 04:00:26,066 (trainer:672) INFO: 17epoch:train:2851-3325batch: iter_time=1.686e-04, forward_time=0.049, loss_ctc=29.004, loss_transducer=12.269, loss=5.243, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.424e-04, train_time=0.548
[gpu11] 2022-06-08 04:01:32,430 (trainer:672) INFO: 17epoch:train:3326-3800batch: iter_time=1.541e-04, forward_time=0.050, loss_ctc=29.042, loss_transducer=12.517, loss=5.308, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.414e-04, train_time=0.558
[gpu11] 2022-06-08 04:02:38,277 (trainer:672) INFO: 17epoch:train:3801-4275batch: iter_time=1.611e-04, forward_time=0.050, loss_ctc=28.774, loss_transducer=12.394, loss=5.257, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.404e-04, train_time=0.554
[gpu11] 2022-06-08 04:03:44,327 (trainer:672) INFO: 17epoch:train:4276-4750batch: iter_time=1.706e-04, forward_time=0.050, loss_ctc=28.255, loss_transducer=12.117, loss=5.148, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.395e-04, train_time=0.556
[gpu11] 2022-06-08 04:04:50,716 (trainer:672) INFO: 17epoch:train:4751-5225batch: iter_time=1.689e-04, forward_time=0.050, loss_ctc=29.305, loss_transducer=12.704, loss=5.374, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.385e-04, train_time=0.559
[gpu11] 2022-06-08 04:05:56,294 (trainer:672) INFO: 17epoch:train:5226-5700batch: iter_time=1.735e-04, forward_time=0.049, loss_ctc=27.616, loss_transducer=11.852, loss=5.034, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.375e-04, train_time=0.552
[gpu11] 2022-06-08 04:07:01,627 (trainer:672) INFO: 17epoch:train:5701-6175batch: iter_time=1.599e-04, forward_time=0.049, loss_ctc=27.686, loss_transducer=11.924, loss=5.057, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.366e-04, train_time=0.551
[gpu11] 2022-06-08 04:08:07,652 (trainer:672) INFO: 17epoch:train:6176-6650batch: iter_time=1.534e-04, forward_time=0.050, loss_ctc=30.109, loss_transducer=13.095, loss=5.532, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.356e-04, train_time=0.555
[gpu11] 2022-06-08 04:09:12,627 (trainer:672) INFO: 17epoch:train:6651-7125batch: iter_time=1.542e-04, forward_time=0.049, loss_ctc=28.842, loss_transducer=12.500, loss=5.288, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.347e-04, train_time=0.547
[gpu11] 2022-06-08 04:10:19,351 (trainer:672) INFO: 17epoch:train:7126-7600batch: iter_time=1.721e-04, forward_time=0.050, loss_ctc=30.515, loss_transducer=13.196, loss=5.588, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.337e-04, train_time=0.561
[gpu11] 2022-06-08 04:11:25,172 (trainer:672) INFO: 17epoch:train:7601-8075batch: iter_time=1.736e-04, forward_time=0.049, loss_ctc=28.775, loss_transducer=12.414, loss=5.262, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.328e-04, train_time=0.555
[gpu11] 2022-06-08 04:12:31,059 (trainer:672) INFO: 17epoch:train:8076-8550batch: iter_time=1.650e-04, forward_time=0.049, loss_ctc=29.812, loss_transducer=13.011, loss=5.489, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.318e-04, train_time=0.554
[gpu11] 2022-06-08 04:13:36,956 (trainer:672) INFO: 17epoch:train:8551-9025batch: iter_time=1.687e-04, forward_time=0.049, loss_ctc=27.504, loss_transducer=11.954, loss=5.051, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.309e-04, train_time=0.555
[gpu11] 2022-06-08 04:14:42,387 (trainer:672) INFO: 17epoch:train:9026-9500batch: iter_time=1.660e-04, forward_time=0.049, loss_ctc=27.968, loss_transducer=12.231, loss=5.155, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.300e-04, train_time=0.550
[gpu11] 2022-06-08 04:14:57,784 (trainer:328) INFO: 17epoch results: [train] iter_time=1.937e-04, forward_time=0.049, loss_ctc=28.726, loss_transducer=12.311, loss=5.232, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.390e-04, train_time=0.554, time=21 minutes and 57.77 seconds, total_count=161551, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=28.065, cer_ctc=0.146, loss_transducer=11.880, cer_transducer=nan, wer_transducer=nan, loss=20.300, time=14.63 seconds, total_count=4777, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 04:15:00,743 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 04:15:00,772 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/14epoch.pth
[gpu11] 2022-06-08 04:15:00,773 (trainer:262) INFO: 18/20epoch started. Estimated time to finish: 1 hour, 6 minutes and 52.8 seconds
[gpu11] 2022-06-08 04:16:06,850 (trainer:672) INFO: 18epoch:train:1-475batch: iter_time=8.366e-04, forward_time=0.049, loss_ctc=27.207, loss_transducer=11.190, loss=4.838, backward_time=0.047, optim_step_time=0.033, optim0_lr0=6.290e-04, train_time=0.557
[gpu11] 2022-06-08 04:17:12,506 (trainer:672) INFO: 18epoch:train:476-950batch: iter_time=1.579e-04, forward_time=0.049, loss_ctc=25.767, loss_transducer=10.780, loss=4.627, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.281e-04, train_time=0.552
[gpu11] 2022-06-08 04:18:18,195 (trainer:672) INFO: 18epoch:train:951-1425batch: iter_time=1.571e-04, forward_time=0.049, loss_ctc=27.103, loss_transducer=11.254, loss=4.846, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.272e-04, train_time=0.553
[gpu11] 2022-06-08 04:19:24,134 (trainer:672) INFO: 18epoch:train:1426-1900batch: iter_time=1.663e-04, forward_time=0.049, loss_ctc=27.673, loss_transducer=11.505, loss=4.952, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.263e-04, train_time=0.555
[gpu11] 2022-06-08 04:20:29,753 (trainer:672) INFO: 18epoch:train:1901-2375batch: iter_time=1.474e-04, forward_time=0.049, loss_ctc=26.929, loss_transducer=11.054, loss=4.783, backward_time=0.047, optim_step_time=0.033, optim0_lr0=6.254e-04, train_time=0.552
[gpu11] 2022-06-08 04:21:35,603 (trainer:672) INFO: 18epoch:train:2376-2850batch: iter_time=1.514e-04, forward_time=0.050, loss_ctc=28.026, loss_transducer=11.967, loss=5.094, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.245e-04, train_time=0.555
[gpu11] 2022-06-08 04:22:41,999 (trainer:672) INFO: 18epoch:train:2851-3325batch: iter_time=1.454e-04, forward_time=0.050, loss_ctc=26.553, loss_transducer=11.204, loss=4.793, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.236e-04, train_time=0.559
[gpu11] 2022-06-08 04:23:48,160 (trainer:672) INFO: 18epoch:train:3326-3800batch: iter_time=1.595e-04, forward_time=0.049, loss_ctc=28.516, loss_transducer=12.033, loss=5.147, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.227e-04, train_time=0.557
[gpu11] 2022-06-08 04:24:53,950 (trainer:672) INFO: 18epoch:train:3801-4275batch: iter_time=1.485e-04, forward_time=0.049, loss_ctc=27.496, loss_transducer=11.564, loss=4.953, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.218e-04, train_time=0.554
[gpu11] 2022-06-08 04:25:59,025 (trainer:672) INFO: 18epoch:train:4276-4750batch: iter_time=1.386e-04, forward_time=0.049, loss_ctc=28.546, loss_transducer=12.078, loss=5.160, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.209e-04, train_time=0.548
[gpu11] 2022-06-08 04:27:04,578 (trainer:672) INFO: 18epoch:train:4751-5225batch: iter_time=1.429e-04, forward_time=0.049, loss_ctc=28.765, loss_transducer=12.247, loss=5.219, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.200e-04, train_time=0.552
[gpu11] 2022-06-08 04:28:10,522 (trainer:672) INFO: 18epoch:train:5226-5700batch: iter_time=1.401e-04, forward_time=0.049, loss_ctc=29.243, loss_transducer=12.463, loss=5.309, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.191e-04, train_time=0.555
[gpu11] 2022-06-08 04:29:15,702 (trainer:672) INFO: 18epoch:train:5701-6175batch: iter_time=1.443e-04, forward_time=0.049, loss_ctc=26.857, loss_transducer=11.407, loss=4.866, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.182e-04, train_time=0.549
[gpu11] 2022-06-08 04:30:20,957 (trainer:672) INFO: 18epoch:train:6176-6650batch: iter_time=1.473e-04, forward_time=0.049, loss_ctc=25.771, loss_transducer=10.863, loss=4.649, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.174e-04, train_time=0.549
[gpu11] 2022-06-08 04:31:26,521 (trainer:672) INFO: 18epoch:train:6651-7125batch: iter_time=1.603e-04, forward_time=0.049, loss_ctc=28.013, loss_transducer=11.985, loss=5.097, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.165e-04, train_time=0.552
[gpu11] 2022-06-08 04:32:32,303 (trainer:672) INFO: 18epoch:train:7126-7600batch: iter_time=1.571e-04, forward_time=0.049, loss_ctc=27.390, loss_transducer=11.816, loss=5.008, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.156e-04, train_time=0.554
[gpu11] 2022-06-08 04:33:37,693 (trainer:672) INFO: 18epoch:train:7601-8075batch: iter_time=1.577e-04, forward_time=0.049, loss_ctc=28.109, loss_transducer=12.177, loss=5.152, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.148e-04, train_time=0.551
[gpu11] 2022-06-08 04:34:42,501 (trainer:672) INFO: 18epoch:train:8076-8550batch: iter_time=1.550e-04, forward_time=0.048, loss_ctc=26.980, loss_transducer=11.594, loss=4.922, backward_time=0.044, optim_step_time=0.032, optim0_lr0=6.139e-04, train_time=0.546
[gpu11] 2022-06-08 04:35:47,609 (trainer:672) INFO: 18epoch:train:8551-9025batch: iter_time=1.572e-04, forward_time=0.048, loss_ctc=27.053, loss_transducer=11.525, loss=4.910, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.130e-04, train_time=0.548
[gpu11] 2022-06-08 04:36:53,385 (trainer:672) INFO: 18epoch:train:9026-9500batch: iter_time=1.655e-04, forward_time=0.049, loss_ctc=28.972, loss_transducer=12.595, loss=5.322, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.122e-04, train_time=0.553
[gpu11] 2022-06-08 04:37:08,731 (trainer:328) INFO: 18epoch results: [train] iter_time=1.868e-04, forward_time=0.049, loss_ctc=27.537, loss_transducer=11.660, loss=4.980, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.205e-04, train_time=0.552, time=21 minutes and 53.37 seconds, total_count=171054, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=27.917, cer_ctc=0.146, loss_transducer=11.771, cer_transducer=nan, wer_transducer=nan, loss=20.146, time=14.59 seconds, total_count=5058, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 04:37:11,689 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 04:37:11,718 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/15epoch.pth
[gpu11] 2022-06-08 04:37:11,718 (trainer:262) INFO: 19/20epoch started. Estimated time to finish: 44 minutes and 34.46 seconds
[gpu11] 2022-06-08 04:38:18,376 (trainer:672) INFO: 19epoch:train:1-475batch: iter_time=7.583e-04, forward_time=0.049, loss_ctc=24.830, loss_transducer=10.052, loss=4.375, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.113e-04, train_time=0.562
[gpu11] 2022-06-08 04:39:23,357 (trainer:672) INFO: 19epoch:train:476-950batch: iter_time=1.737e-04, forward_time=0.048, loss_ctc=25.142, loss_transducer=10.401, loss=4.486, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.105e-04, train_time=0.547
[gpu11] 2022-06-08 04:40:28,047 (trainer:672) INFO: 19epoch:train:951-1425batch: iter_time=1.618e-04, forward_time=0.048, loss_ctc=25.337, loss_transducer=10.370, loss=4.493, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.097e-04, train_time=0.544
[gpu11] 2022-06-08 04:41:33,337 (trainer:672) INFO: 19epoch:train:1426-1900batch: iter_time=1.699e-04, forward_time=0.049, loss_ctc=26.214, loss_transducer=10.738, loss=4.651, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.088e-04, train_time=0.550
[gpu11] 2022-06-08 04:42:38,653 (trainer:672) INFO: 19epoch:train:1901-2375batch: iter_time=1.732e-04, forward_time=0.049, loss_ctc=27.760, loss_transducer=11.530, loss=4.965, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.080e-04, train_time=0.550
[gpu11] 2022-06-08 04:43:43,837 (trainer:672) INFO: 19epoch:train:2376-2850batch: iter_time=1.648e-04, forward_time=0.048, loss_ctc=27.628, loss_transducer=11.463, loss=4.938, backward_time=0.044, optim_step_time=0.033, optim0_lr0=6.071e-04, train_time=0.549
[gpu11] 2022-06-08 04:44:49,810 (trainer:672) INFO: 19epoch:train:2851-3325batch: iter_time=1.564e-04, forward_time=0.049, loss_ctc=26.534, loss_transducer=11.164, loss=4.781, backward_time=0.046, optim_step_time=0.033, optim0_lr0=6.063e-04, train_time=0.555
[gpu11] 2022-06-08 04:45:55,803 (trainer:672) INFO: 19epoch:train:3326-3800batch: iter_time=1.575e-04, forward_time=0.050, loss_ctc=26.421, loss_transducer=11.123, loss=4.762, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.055e-04, train_time=0.555
[gpu11] 2022-06-08 04:47:02,014 (trainer:672) INFO: 19epoch:train:3801-4275batch: iter_time=1.488e-04, forward_time=0.050, loss_ctc=26.342, loss_transducer=11.110, loss=4.753, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.047e-04, train_time=0.558
[gpu11] 2022-06-08 04:48:07,773 (trainer:672) INFO: 19epoch:train:4276-4750batch: iter_time=1.551e-04, forward_time=0.049, loss_ctc=26.189, loss_transducer=10.991, loss=4.712, backward_time=0.045, optim_step_time=0.033, optim0_lr0=6.039e-04, train_time=0.553
[gpu11] 2022-06-08 04:49:13,143 (trainer:672) INFO: 19epoch:train:4751-5225batch: iter_time=1.694e-04, forward_time=0.049, loss_ctc=25.811, loss_transducer=10.779, loss=4.631, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.030e-04, train_time=0.550
[gpu11] 2022-06-08 04:50:18,128 (trainer:672) INFO: 19epoch:train:5226-5700batch: iter_time=1.693e-04, forward_time=0.049, loss_ctc=26.780, loss_transducer=11.387, loss=4.855, backward_time=0.044, optim_step_time=0.032, optim0_lr0=6.022e-04, train_time=0.547
[gpu11] 2022-06-08 04:51:23,462 (trainer:672) INFO: 19epoch:train:5701-6175batch: iter_time=1.721e-04, forward_time=0.049, loss_ctc=26.995, loss_transducer=11.238, loss=4.834, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.014e-04, train_time=0.550
[gpu11] 2022-06-08 04:52:29,361 (trainer:672) INFO: 19epoch:train:6176-6650batch: iter_time=1.712e-04, forward_time=0.049, loss_ctc=27.844, loss_transducer=11.771, loss=5.031, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.006e-04, train_time=0.556
[gpu11] 2022-06-08 04:53:34,539 (trainer:672) INFO: 19epoch:train:6651-7125batch: iter_time=1.681e-04, forward_time=0.049, loss_ctc=26.934, loss_transducer=11.339, loss=4.855, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.998e-04, train_time=0.548
[gpu11] 2022-06-08 04:54:40,323 (trainer:672) INFO: 19epoch:train:7126-7600batch: iter_time=1.716e-04, forward_time=0.049, loss_ctc=27.277, loss_transducer=11.494, loss=4.919, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.990e-04, train_time=0.553
[gpu11] 2022-06-08 04:55:45,836 (trainer:672) INFO: 19epoch:train:7601-8075batch: iter_time=1.721e-04, forward_time=0.049, loss_ctc=26.163, loss_transducer=11.109, loss=4.739, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.982e-04, train_time=0.552
[gpu11] 2022-06-08 04:56:51,663 (trainer:672) INFO: 19epoch:train:8076-8550batch: iter_time=1.621e-04, forward_time=0.049, loss_ctc=26.327, loss_transducer=11.294, loss=4.798, backward_time=0.044, optim_step_time=0.032, optim0_lr0=5.974e-04, train_time=0.554
[gpu11] 2022-06-08 04:57:57,146 (trainer:672) INFO: 19epoch:train:8551-9025batch: iter_time=1.663e-04, forward_time=0.049, loss_ctc=26.390, loss_transducer=11.254, loss=4.793, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.966e-04, train_time=0.551
[gpu11] 2022-06-08 04:59:02,610 (trainer:672) INFO: 19epoch:train:9026-9500batch: iter_time=1.696e-04, forward_time=0.048, loss_ctc=26.317, loss_transducer=11.194, loss=4.772, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.958e-04, train_time=0.551
[gpu11] 2022-06-08 04:59:17,944 (trainer:328) INFO: 19epoch results: [train] iter_time=1.955e-04, forward_time=0.049, loss_ctc=26.459, loss_transducer=11.090, loss=4.757, backward_time=0.045, optim_step_time=0.032, optim0_lr0=6.035e-04, train_time=0.552, time=21 minutes and 51.67 seconds, total_count=180557, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=27.392, cer_ctc=0.145, loss_transducer=11.676, cer_transducer=nan, wer_transducer=nan, loss=19.893, time=14.55 seconds, total_count=5339, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 04:59:20,881 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 04:59:20,909 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/16epoch.pth
[gpu11] 2022-06-08 04:59:20,909 (trainer:262) INFO: 20/20epoch started. Estimated time to finish: 22 minutes and 16.81 seconds
[gpu11] 2022-06-08 05:00:26,300 (trainer:672) INFO: 20epoch:train:1-475batch: iter_time=8.713e-04, forward_time=0.049, loss_ctc=23.814, loss_transducer=9.752, loss=4.224, backward_time=0.044, optim_step_time=0.032, optim0_lr0=5.951e-04, train_time=0.551
[gpu11] 2022-06-08 05:01:31,766 (trainer:672) INFO: 20epoch:train:476-950batch: iter_time=1.570e-04, forward_time=0.049, loss_ctc=25.058, loss_transducer=10.127, loss=4.411, backward_time=0.046, optim_step_time=0.032, optim0_lr0=5.943e-04, train_time=0.551
[gpu11] 2022-06-08 05:02:37,745 (trainer:672) INFO: 20epoch:train:951-1425batch: iter_time=1.626e-04, forward_time=0.049, loss_ctc=24.916, loss_transducer=10.256, loss=4.433, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.935e-04, train_time=0.555
[gpu11] 2022-06-08 05:03:43,931 (trainer:672) INFO: 20epoch:train:1426-1900batch: iter_time=1.596e-04, forward_time=0.049, loss_ctc=25.674, loss_transducer=10.567, loss=4.567, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.927e-04, train_time=0.557
[gpu11] 2022-06-08 05:04:49,058 (trainer:672) INFO: 20epoch:train:1901-2375batch: iter_time=1.459e-04, forward_time=0.049, loss_ctc=25.312, loss_transducer=10.432, loss=4.506, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.920e-04, train_time=0.549
[gpu11] 2022-06-08 05:05:55,177 (trainer:672) INFO: 20epoch:train:2376-2850batch: iter_time=1.491e-04, forward_time=0.049, loss_ctc=25.773, loss_transducer=10.559, loss=4.573, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.912e-04, train_time=0.556
[gpu11] 2022-06-08 05:07:01,502 (trainer:672) INFO: 20epoch:train:2851-3325batch: iter_time=1.400e-04, forward_time=0.050, loss_ctc=25.737, loss_transducer=10.647, loss=4.592, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.904e-04, train_time=0.559
[gpu11] 2022-06-08 05:08:07,522 (trainer:672) INFO: 20epoch:train:3326-3800batch: iter_time=1.552e-04, forward_time=0.050, loss_ctc=25.640, loss_transducer=10.666, loss=4.590, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.897e-04, train_time=0.555
[gpu11] 2022-06-08 05:09:12,813 (trainer:672) INFO: 20epoch:train:3801-4275batch: iter_time=1.464e-04, forward_time=0.049, loss_ctc=25.353, loss_transducer=10.342, loss=4.487, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.889e-04, train_time=0.550
[gpu11] 2022-06-08 05:10:19,234 (trainer:672) INFO: 20epoch:train:4276-4750batch: iter_time=1.479e-04, forward_time=0.050, loss_ctc=24.525, loss_transducer=10.275, loss=4.408, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.881e-04, train_time=0.559
[gpu11] 2022-06-08 05:11:25,522 (trainer:672) INFO: 20epoch:train:4751-5225batch: iter_time=1.500e-04, forward_time=0.050, loss_ctc=26.437, loss_transducer=11.246, loss=4.794, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.874e-04, train_time=0.558
[gpu11] 2022-06-08 05:12:31,010 (trainer:672) INFO: 20epoch:train:5226-5700batch: iter_time=1.447e-04, forward_time=0.049, loss_ctc=25.675, loss_transducer=10.659, loss=4.590, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.866e-04, train_time=0.551
[gpu11] 2022-06-08 05:13:36,957 (trainer:672) INFO: 20epoch:train:5701-6175batch: iter_time=1.472e-04, forward_time=0.050, loss_ctc=24.822, loss_transducer=10.343, loss=4.447, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.859e-04, train_time=0.555
[gpu11] 2022-06-08 05:14:42,488 (trainer:672) INFO: 20epoch:train:6176-6650batch: iter_time=1.513e-04, forward_time=0.049, loss_ctc=26.071, loss_transducer=10.874, loss=4.674, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.851e-04, train_time=0.552
[gpu11] 2022-06-08 05:15:48,559 (trainer:672) INFO: 20epoch:train:6651-7125batch: iter_time=1.502e-04, forward_time=0.050, loss_ctc=25.333, loss_transducer=10.568, loss=4.542, backward_time=0.046, optim_step_time=0.033, optim0_lr0=5.844e-04, train_time=0.556
[gpu11] 2022-06-08 05:16:54,327 (trainer:672) INFO: 20epoch:train:7126-7600batch: iter_time=1.531e-04, forward_time=0.049, loss_ctc=24.789, loss_transducer=10.544, loss=4.495, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.837e-04, train_time=0.553
[gpu11] 2022-06-08 05:17:59,696 (trainer:672) INFO: 20epoch:train:7601-8075batch: iter_time=1.651e-04, forward_time=0.049, loss_ctc=24.677, loss_transducer=10.422, loss=4.456, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.829e-04, train_time=0.550
[gpu11] 2022-06-08 05:19:05,516 (trainer:672) INFO: 20epoch:train:8076-8550batch: iter_time=1.599e-04, forward_time=0.049, loss_ctc=25.900, loss_transducer=10.868, loss=4.660, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.822e-04, train_time=0.554
[gpu11] 2022-06-08 05:20:10,303 (trainer:672) INFO: 20epoch:train:8551-9025batch: iter_time=1.660e-04, forward_time=0.049, loss_ctc=25.144, loss_transducer=10.637, loss=4.545, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.815e-04, train_time=0.545
[gpu11] 2022-06-08 05:21:16,422 (trainer:672) INFO: 20epoch:train:9026-9500batch: iter_time=1.595e-04, forward_time=0.049, loss_ctc=25.798, loss_transducer=10.882, loss=4.655, backward_time=0.045, optim_step_time=0.032, optim0_lr0=5.807e-04, train_time=0.556
[gpu11] 2022-06-08 05:21:31,754 (trainer:328) INFO: 20epoch results: [train] iter_time=1.891e-04, forward_time=0.049, loss_ctc=25.318, loss_transducer=10.531, loss=4.532, backward_time=0.045, optim_step_time=0.033, optim0_lr0=5.878e-04, train_time=0.554, time=21 minutes and 56.22 seconds, total_count=190060, gpu_max_cached_mem_GB=15.090, [valid] loss_ctc=27.262, cer_ctc=0.140, loss_transducer=11.630, cer_transducer=nan, wer_transducer=nan, loss=19.808, time=14.63 seconds, total_count=5620, gpu_max_cached_mem_GB=15.090
[gpu11] 2022-06-08 05:21:34,648 (trainer:376) INFO: The best model has been updated: valid.loss
[gpu11] 2022-06-08 05:21:34,676 (trainer:430) INFO: The model files were removed: exp/asr_IndEng188h-rnnt-600bpe/17epoch.pth
[gpu11] 2022-06-08 05:21:34,676 (trainer:448) INFO: The training was finished at 20 epochs
[gpu11] 2022-06-08 05:21:34,705 (average_nbest_models:69) INFO: Averaging 3best models: criterion="valid.loss": exp/asr_IndEng188h-rnnt-600bpe/valid.loss.ave_3best.pth
# Accounting: time=26745 threads=1
# Ended (code 0) at Wed Jun 8 05:21:36 IST 2022, elapsed time 26745 seconds