diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2e9b14f43b35766402c6c1edc8c01859ee3b0050 --- /dev/null +++ b/README.md @@ -0,0 +1,202 @@ +--- +base_model: Willow123/LVP_R560_IHD24_S3_1024_N24_CAT +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.8.2 diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e91f195aa27438a0fc7278f298e0d8dbcaef2eda --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Willow123/LVP_R560_IHD24_S3_1024_N24_CAT", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "video_mem_proj" + ], + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "feed_forward.w2", + "attention.wo", + "feed_forward.w3", + "feed_forward.w1", + "attention.wqkv" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} diff --git a/adapter_model.bin b/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..64d3599083dc5464b098d20b4daa26cf3ee66767 --- /dev/null +++ b/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc9d0e035f664fbf923ef2c5f1b792d06e9d354aaac940764d6344f206275985 +size 650245533 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..39090265148ac44e5d5ce46e69594ac3ad34b1e5 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,8 @@ +{ + "<|action_end|>": 92547, + "<|action_start|>": 92546, + "<|im_end|>": 92545, + "<|im_start|>": 92544, + "<|interpreter|>": 92548, + "<|plugin|>": 92549 +} diff --git a/latest b/latest new file mode 100644 index 0000000000000000000000000000000000000000..75ae820a9e47ad9c01975fb0f9a1fead180a2721 --- /dev/null +++ b/latest @@ -0,0 +1 @@ +global_step5205 \ No newline at end of file diff --git a/rng_state_0.pth b/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e378fd2d0ab32bd4f07d29f243a6e509b0792d77 --- /dev/null +++ b/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f32f3deebf95eae840de33c723b45d006a96b29bc35e9b934212bd156d2b588 +size 21687 diff --git a/rng_state_1.pth b/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..78d8e34720df7d704d8048d2ccc6ad123fa8cb71 --- /dev/null +++ b/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096d3a423466f936fac9485eb4afd048be9c6e9dd001e7aefee72ad97960be60 +size 21687 diff --git a/rng_state_10.pth b/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..54b446f1c17e13df672e935ffd744973d8f59ad7 --- /dev/null +++ b/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f578cef3c3be5fc4a46d84373ef3cbd8841afe911cfa12a6c88b1c2ddbd1c9 +size 21698 diff --git a/rng_state_11.pth b/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..a6912eb48d8c1276d01544bf0db24814a5873553 --- /dev/null +++ b/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aefb94f2f768a405d771252d008f196da11b3511a95a99677a7f2c37d6fbc062 +size 21698 diff --git a/rng_state_12.pth b/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..15ebdedde014c9dbc036b7bec529f8fc77b53cab --- /dev/null +++ b/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b855348be7d42d8c194c7e94c51c6c35845ae1b9cbb471a4cf4f4a0bf95f44 +size 21698 diff --git a/rng_state_13.pth b/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d4514f965e81d5c543f0ca72394203fcb1c194e --- /dev/null +++ b/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45bec30d67f590021d868dcac4d053d411ba351b02feeb868d60633ba8b0a3a +size 21698 diff --git a/rng_state_14.pth b/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ad79a4462699c247b66d78c756b6da62b686544 --- /dev/null +++ b/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffda5563f489cd115a603bb92d58313efdbed991d5d4ae30937daf2637311929 +size 21698 diff --git a/rng_state_15.pth b/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..40767c8f8fee7320f98ca422f60a84ec8b13fd65 --- /dev/null +++ b/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a3b3e76d3dfcf2e0a4b85364ac34e592f64341f51f2b5732f98d8258503683 +size 21698 diff --git a/rng_state_16.pth b/rng_state_16.pth new file mode 100644 index 0000000000000000000000000000000000000000..5dcada17d6b8169a8cc29ab588893d65e96f9333 --- /dev/null +++ b/rng_state_16.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b78460734829b0a4e9c14f9140bf6955a6b983f4d07a4ab1a185ac70eb4ee5d7 +size 21698 diff --git a/rng_state_17.pth b/rng_state_17.pth new file mode 100644 index 0000000000000000000000000000000000000000..aed3c79d15626235fb172ccaa030ee1da54b68ac --- /dev/null +++ b/rng_state_17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71b49deebaf8677b488c61536aafbed5ab4da647cfd264c098742a0679c211f +size 21698 diff --git a/rng_state_18.pth b/rng_state_18.pth new file mode 100644 index 0000000000000000000000000000000000000000..45af1a8b7ecbe12121b39f1b303d3b73d47fbdb1 --- /dev/null +++ b/rng_state_18.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b4a2bbf369561611d9c79fca2cdae1df654bca465a3ed8778c4718fe936be0 +size 21698 diff --git a/rng_state_19.pth b/rng_state_19.pth new file mode 100644 index 0000000000000000000000000000000000000000..2228579cc792afacae97e226dcd37d2d0b4db8fd --- /dev/null +++ b/rng_state_19.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f261d09d3cbd26d0c734dd38a47b1f0834d18743a1472800f9d3e7155a494698 +size 21698 diff --git a/rng_state_2.pth b/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f08ec73f4d4384f93e4879b68776ad49368741fe --- /dev/null +++ b/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0809f61282e55bc316e0f0b6de6b9610219668913abd22600a57b663b2a9fb1 +size 21687 diff --git a/rng_state_20.pth b/rng_state_20.pth new file mode 100644 index 0000000000000000000000000000000000000000..f3eed69fc5eb6dd6693478566ea89c79a693265f --- /dev/null +++ b/rng_state_20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0d3a61cc607fbd8a5d3aeee2abd2233e8638c6fb0c19926946341f834e5b57 +size 21698 diff --git a/rng_state_21.pth b/rng_state_21.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0509cee1a131a065aac0a6e3cca954c777d3858 --- /dev/null +++ b/rng_state_21.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc106ee653e4f285b14d0c908b971af3bf6cf4d2e4a9c38fc046f0a9a1dbf60 +size 21698 diff --git a/rng_state_22.pth b/rng_state_22.pth new file mode 100644 index 0000000000000000000000000000000000000000..c78533d360af010fa0abf0c10281b22c52b48251 --- /dev/null +++ b/rng_state_22.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f7f1a576417e93bd438710fbe49b9b6c29acd1dd83f68fa69041e406e33295 +size 21698 diff --git a/rng_state_23.pth b/rng_state_23.pth new file mode 100644 index 0000000000000000000000000000000000000000..a95b1522cfee5faf513d8c52690ccd90f40761d8 --- /dev/null +++ b/rng_state_23.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5bede2c9a0dedc8dbae33c7408f5902bf6ca26af91f3c37badc01393765c20b +size 21698 diff --git a/rng_state_24.pth b/rng_state_24.pth new file mode 100644 index 0000000000000000000000000000000000000000..212e001ac5d3272da2e8252de5d7adeee0d60ddc --- /dev/null +++ b/rng_state_24.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed7167b9b5af50cc9151f7e68e780fc4b61f2f73eeb3c0bb8b675c48250f42e +size 21698 diff --git a/rng_state_25.pth b/rng_state_25.pth new file mode 100644 index 0000000000000000000000000000000000000000..c606cafefcf457951487182e7db310541b009068 --- /dev/null +++ b/rng_state_25.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a8bcefe50fa56e84e00332f42333de7ac29ca9c85053b70152541f132280629 +size 21698 diff --git a/rng_state_26.pth b/rng_state_26.pth new file mode 100644 index 0000000000000000000000000000000000000000..47fcfafac5054dd9e7297429441dfc42cf536b16 --- /dev/null +++ b/rng_state_26.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9878267b7d028fec9517d4933e946095a87a0c2926fc1e71036ff4971f87c30 +size 21698 diff --git a/rng_state_27.pth b/rng_state_27.pth new file mode 100644 index 0000000000000000000000000000000000000000..e568bfd4da0fa0be99df97d5f4d41e06d5da0f4f --- /dev/null +++ b/rng_state_27.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab74b9342d718bc575d40e99258a2b3bb93e47546d1e7d20584a0c5a5c01e98 +size 21698 diff --git a/rng_state_28.pth b/rng_state_28.pth new file mode 100644 index 0000000000000000000000000000000000000000..b11e8f7553642203dfe197d90cbb5f81292ad92c --- /dev/null +++ b/rng_state_28.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f9265f6bca1c7985ce8f4b6fed5e61d285044ee960d5e0fa14c794ba6753a21 +size 21698 diff --git a/rng_state_29.pth b/rng_state_29.pth new file mode 100644 index 0000000000000000000000000000000000000000..32538dcc9982aca847a4de773af9015aa172064f --- /dev/null +++ b/rng_state_29.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f046cd5d4e2573eeb1ab2d78a539f5de5db3b665c459b19adba8a4db314dc992 +size 21698 diff --git a/rng_state_3.pth b/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4d71ac68f4655708cb43a6324269f5c4a10e57f --- /dev/null +++ b/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a268e995c7a8abe9c05675b6dad6ef3f7b675c7490d238c343d182fec337c3a +size 21687 diff --git a/rng_state_30.pth b/rng_state_30.pth new file mode 100644 index 0000000000000000000000000000000000000000..f39510c4bfedd3159cde571b855e02f76ada5976 --- /dev/null +++ b/rng_state_30.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0fa9ee6eede2f7c682f2de10cdaf83be9593eb8d101c8246d78db6d0a73c718 +size 21698 diff --git a/rng_state_31.pth b/rng_state_31.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d968f594cf6b6201c96645a7406550fb1c7eaff --- /dev/null +++ b/rng_state_31.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:903c3e981821ca9263d60633e520d37fcdcf2ab3a4d32a45355bd4ea5989849b +size 21698 diff --git a/rng_state_32.pth b/rng_state_32.pth new file mode 100644 index 0000000000000000000000000000000000000000..89bf16999aa9a49d7a33af00092611350dec118e --- /dev/null +++ b/rng_state_32.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d211c885b66eb9bd4605948e4bdf25e8a81567f91142e33728437ce83e1b566e +size 21698 diff --git a/rng_state_33.pth b/rng_state_33.pth new file mode 100644 index 0000000000000000000000000000000000000000..f56fe888166ead8c83238354e3b7f5d9039e462e --- /dev/null +++ b/rng_state_33.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23eeb55dcacdeb4724b3ad17396c314c0744f7de9053fe96f59e57f295b4fe8e +size 21698 diff --git a/rng_state_34.pth b/rng_state_34.pth new file mode 100644 index 0000000000000000000000000000000000000000..be92bf71a41659b4d0cb9f05e18620de4445ddd2 --- /dev/null +++ b/rng_state_34.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dd174caa80771bb0ff737eb515840180862605a2d0bc367e1cf38cbad71d59 +size 21698 diff --git a/rng_state_35.pth b/rng_state_35.pth new file mode 100644 index 0000000000000000000000000000000000000000..b482b0ecfd5eb25992e12d381f54ca18d7b10e12 --- /dev/null +++ b/rng_state_35.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2e9715a7cb5a3f3cb52b3df1568314848fdd8cbcad741f2152aa183a6e2e9dc +size 21698 diff --git a/rng_state_36.pth b/rng_state_36.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b7e423bded1c2a23aec9b2a1b7aed60cf771e3e --- /dev/null +++ b/rng_state_36.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a5ed4ee19cb2614099a5ba801c5ed72b14575cf1f2e25f9ffbe43a1386f7858 +size 21698 diff --git a/rng_state_37.pth b/rng_state_37.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ac03137fcfeecf4e81c7de31eaba59f4abce685 --- /dev/null +++ b/rng_state_37.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651b4aac19c2d626983bba18237f747f728000646133e6b0b049f243cb3132ea +size 21698 diff --git a/rng_state_38.pth b/rng_state_38.pth new file mode 100644 index 0000000000000000000000000000000000000000..b35869b1d78d4b61b03afa3e0df3c46123e9ed88 --- /dev/null +++ b/rng_state_38.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741c12b7082a5202243578c85ca643308320066210cd9236c8095b61fc80f37e +size 21698 diff --git a/rng_state_39.pth b/rng_state_39.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3d1832a876a9ca249266f7cdadb5a8353c186b2 --- /dev/null +++ b/rng_state_39.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973a9bd5d699c4e0b70fdf3fe694e61bf5485e0485469b1b9689f648f7e55ec5 +size 21698 diff --git a/rng_state_4.pth b/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd9f469b6044224d2531d71eabed48806aa765d2 --- /dev/null +++ b/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70ea0519f22c148c6af06722c576a6b2a4d63ad7adb9ca5367629a4706d6e5b +size 21687 diff --git a/rng_state_40.pth b/rng_state_40.pth new file mode 100644 index 0000000000000000000000000000000000000000..4984d1054fd90f205eeb7a19575a0e6d49e5bb3b --- /dev/null +++ b/rng_state_40.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793a48ef299ca8b93a74f269a7bdc1d03857061baa7f3a63076ee937963b314d +size 21698 diff --git a/rng_state_41.pth b/rng_state_41.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c9e5b6dcfa080f43700b83323ce3da155d700b1 --- /dev/null +++ b/rng_state_41.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cac3e5a480e7456e6b15be70c10c1e72762052ac4a4230fc6b4879dea35044e +size 21698 diff --git a/rng_state_42.pth b/rng_state_42.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ec5cc50ac3d7f02c551e9e64d789830bd311e89 --- /dev/null +++ b/rng_state_42.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f6179d1d35d75865cfae7e2f728992dd5e1bf7c8744f33532bf51885d4fb5e +size 21698 diff --git a/rng_state_43.pth b/rng_state_43.pth new file mode 100644 index 0000000000000000000000000000000000000000..a76d7925a715279b8059a983466152cf678deac6 --- /dev/null +++ b/rng_state_43.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f055581ee89ef7a90f56f33f47239ea59aa63446af01f45b292a22b314b4fc55 +size 21698 diff --git a/rng_state_44.pth b/rng_state_44.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2781715dd6e2a56fbaa1e8c23d318ab39b9284f --- /dev/null +++ b/rng_state_44.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:145ba85aeb47b401a0a9350c847991e69fcd1a5d126e590e4b35ce539e89296e +size 21698 diff --git a/rng_state_45.pth b/rng_state_45.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1fbeb8ed1898563e57f7edfa610242f684cf26b --- /dev/null +++ b/rng_state_45.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998194852a2d6f4c16337accc920327a300253b56dccead0ca0317d3fef7ff62 +size 21698 diff --git a/rng_state_46.pth b/rng_state_46.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0b01c7562c937bdb56b4379bac1ca37bcada33e --- /dev/null +++ b/rng_state_46.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a26ffd0852413a7ab601ec75ae7e95cbb44b28bdde1dfcdce660908a29977f2 +size 21698 diff --git a/rng_state_47.pth b/rng_state_47.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4d896af65a7687eb1a06b7887b82addfd1f6005 --- /dev/null +++ b/rng_state_47.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd4289f273fb9119a44d8b71406282927c1c91f514c930be1c0dd700ccc3244 +size 21698 diff --git a/rng_state_48.pth b/rng_state_48.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0675330b9ee4942c1d510240817300b17af3445 --- /dev/null +++ b/rng_state_48.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508969502676847268b1d897ca0143061ddbfb191ab89db355d15f1abf9a1648 +size 21698 diff --git a/rng_state_49.pth b/rng_state_49.pth new file mode 100644 index 0000000000000000000000000000000000000000..641409e9b438cb2b33972cd7dadbd71f2b6a59d1 --- /dev/null +++ b/rng_state_49.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9789afc9eb30d45dc49a69bd432888ac6836ef5668db68c014d6ba71c890d262 +size 21698 diff --git a/rng_state_5.pth b/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b87cde630dd8c3b9020219871b2174f778754fa --- /dev/null +++ b/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae151f2bd3e2589f8d312e51e6e8d556dc9296a0d7f26fafd2260bd1c97fb0f4 +size 21687 diff --git a/rng_state_50.pth b/rng_state_50.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac9ec962c911319c4a4b42039ddaba64f6588d0d --- /dev/null +++ b/rng_state_50.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20500264cbd8c7e000d466fbeea63318f966b96c49a6b5d7091998e092a1a188 +size 21698 diff --git a/rng_state_51.pth b/rng_state_51.pth new file mode 100644 index 0000000000000000000000000000000000000000..c302eefbe0ad4d2cf94dc7ab347a453f00896edf --- /dev/null +++ b/rng_state_51.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:025f7058c3ff8d344a7dd701a5c7d68a8d01e203610fc1aa924543e3459c7bab +size 21698 diff --git a/rng_state_52.pth b/rng_state_52.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d7e9057b7b6bff329938308dc17524f9acfc3a9 --- /dev/null +++ b/rng_state_52.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3b5f0e732a8a2626324b4e46038a572225644484e4d08efcefcb3e4e7c087d +size 21698 diff --git a/rng_state_53.pth b/rng_state_53.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3f397d9d38e32c752cc902e430e2c4c209237a4 --- /dev/null +++ b/rng_state_53.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f83f361db9415c9f8267e77e6cc59eb37d6df293506c66718bc6639732256db +size 21698 diff --git a/rng_state_54.pth b/rng_state_54.pth new file mode 100644 index 0000000000000000000000000000000000000000..196bf1b4635ffc48a0cbaf2088702b5cd01c5884 --- /dev/null +++ b/rng_state_54.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f098966c11444eb5b876b7084288c7ee7d7c406347ccf8db3034e7dc020e3e79 +size 21698 diff --git a/rng_state_55.pth b/rng_state_55.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ce0540c7713b07a34a63d47d63916cf1f1b60a5 --- /dev/null +++ b/rng_state_55.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2eceb4f6c460ddbd79594ed9c7832a0d98eb1086d361f4c507cc12cfc6e6217 +size 21698 diff --git a/rng_state_56.pth b/rng_state_56.pth new file mode 100644 index 0000000000000000000000000000000000000000..59375909df70c2aaba5459ca78299561d323b85c --- /dev/null +++ b/rng_state_56.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c96246a0c4ec84499aa01ae6fa22fe102f8a8205731c6b41cd280ce64beebc +size 21698 diff --git a/rng_state_57.pth b/rng_state_57.pth new file mode 100644 index 0000000000000000000000000000000000000000..0736f36c6a4f835a1a41818b7c148767d30ef404 --- /dev/null +++ b/rng_state_57.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5206d653cbd5d2c0978c032751ebc55091d57411bccda8bae4663c7100f0b764 +size 21698 diff --git a/rng_state_58.pth b/rng_state_58.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d3612d731e97c6761e3527f5b056722fd27bfc0 --- /dev/null +++ b/rng_state_58.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e7f95d0a7e48a8d031627ef14af9fe31f8e3329a86a4685c5311e15e7e3802 +size 21698 diff --git a/rng_state_59.pth b/rng_state_59.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd55432a02996c3d5ab4e5dfbd688e53ef5c07c2 --- /dev/null +++ b/rng_state_59.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067b8bc48403d9acb50e621317f598f511630da2c26c27ac041bcf5b591f9ffe +size 21698 diff --git a/rng_state_6.pth b/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..22bd1dc6060a5a7ac736ad09cc1e4df16d887c06 --- /dev/null +++ b/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89e153800f32e9d9adeccfada4c4f3744e39e963d1db7a5c65cc48339c43178 +size 21687 diff --git a/rng_state_60.pth b/rng_state_60.pth new file mode 100644 index 0000000000000000000000000000000000000000..47a9f0002dfbfd02d4505e20a69a5c692ae106a2 --- /dev/null +++ b/rng_state_60.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf7d77313b8adcc5ccf16ea2833cb077a457659633c6711cadfe6805cdb79a14 +size 21698 diff --git a/rng_state_61.pth b/rng_state_61.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb17a5640d92a9b924c3110615a5569ef411063e --- /dev/null +++ b/rng_state_61.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d9d6b78a847ebe3128bd9500abd0d1f5ff960c6ec756d1d2eb8eb28bad86b4 +size 21698 diff --git a/rng_state_62.pth b/rng_state_62.pth new file mode 100644 index 0000000000000000000000000000000000000000..086502cbdc62e4792cc4c3f0ed2c1eb251ecbf6a --- /dev/null +++ b/rng_state_62.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1d1b569762fea936830a772812fe5b37c9c58ef631874036e538a4c54fcd0e +size 21698 diff --git a/rng_state_63.pth b/rng_state_63.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff90a438e11877801054dd9da54a78210e02b264 --- /dev/null +++ b/rng_state_63.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b55a78aa8bfe3e80d9717fad32fed6924e2c1aa0971f3fefe2816736f0b84923 +size 21698 diff --git a/rng_state_7.pth b/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac9b2a0a7e31779db9533edf4ae345bdb576b767 --- /dev/null +++ b/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5317018b52d3086e703ddfe73befe6535978fc55374b88003ed8d9ee74d8627 +size 21687 diff --git a/rng_state_8.pth b/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..95369cdd9b00f7c08d1a62be65b74e5319680ddf --- /dev/null +++ b/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b81d0252f0dbb99a1948b4b88b282e9b34b2f6f94d98d818709e19a535d697f +size 21687 diff --git a/rng_state_9.pth b/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..264b50be663d9d5ff2a5276386fdcabda1a1dd85 --- /dev/null +++ b/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb9fb5ceac79acf0e1041ec03a0e281ae5eb3328f501370ea68e7b27c7fc5b +size 21687 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1023d3510a8f6a72f071f934b9319d2b3dba616e --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,38 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|action_start|>", + "<|action_end|>", + "<|interpreter|>", + "<|plugin|>" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenization_internlm2.py b/tokenization_internlm2.py new file mode 100644 index 0000000000000000000000000000000000000000..ff53eba214b313c86ef489a823167a7ef2b52c09 --- /dev/null +++ b/tokenization_internlm2.py @@ -0,0 +1,236 @@ +# coding=utf-8 +# Copyright (c) The InternLM team and The HuggingFace Inc. team. All rights reserved. +# +# This code is based on transformers/src/transformers/models/llama/tokenization_llama.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tokenization classes for InternLM.""" +import os +from shutil import copyfile +from typing import Any, Dict, List, Optional, Tuple + +import sentencepiece as spm +from transformers.tokenization_utils import PreTrainedTokenizer +from transformers.utils import logging + +logger = logging.get_logger(__name__) + +VOCAB_FILES_NAMES = {"vocab_file": "./tokenizer.model"} + +PRETRAINED_VOCAB_FILES_MAP = {} + + +# Modified from transformers.model.llama.tokenization_llama.LlamaTokenizer +class InternLM2Tokenizer(PreTrainedTokenizer): + """ + Construct a InternLM2 tokenizer. Based on byte-level Byte-Pair-Encoding. + + Args: + vocab_file (`str`): + Path to the vocabulary file. + """ + + vocab_files_names = VOCAB_FILES_NAMES + pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP + model_input_names = ["input_ids", "attention_mask"] + _auto_class = "AutoTokenizer" + + def __init__( + self, + vocab_file, + unk_token="", + bos_token="", + eos_token="", + pad_token="", + sp_model_kwargs: Optional[Dict[str, Any]] = None, + add_bos_token=True, + add_eos_token=False, + decode_with_prefix_space=False, + clean_up_tokenization_spaces=False, + **kwargs, + ): + self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs + self.vocab_file = vocab_file + self.add_bos_token = add_bos_token + self.add_eos_token = add_eos_token + self.decode_with_prefix_space = decode_with_prefix_space + self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs) + self.sp_model.Load(vocab_file) + self._no_prefix_space_tokens = None + super().__init__( + bos_token=bos_token, + eos_token=eos_token, + unk_token=unk_token, + pad_token=pad_token, + clean_up_tokenization_spaces=clean_up_tokenization_spaces, + **kwargs, + ) + + @property + def no_prefix_space_tokens(self): + if self._no_prefix_space_tokens is None: + vocab = self.convert_ids_to_tokens(list(range(self.vocab_size))) + self._no_prefix_space_tokens = {i for i, tok in enumerate(vocab) if not tok.startswith("▁")} + return self._no_prefix_space_tokens + + @property + def vocab_size(self): + """Returns vocab size""" + return self.sp_model.get_piece_size() + + @property + def bos_token_id(self) -> Optional[int]: + return self.sp_model.bos_id() + + @property + def eos_token_id(self) -> Optional[int]: + return self.sp_model.eos_id() + + def get_vocab(self): + """Returns vocab as a dict""" + vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)} + vocab.update(self.added_tokens_encoder) + return vocab + + def _tokenize(self, text): + """Returns a tokenized string.""" + return self.sp_model.encode(text, out_type=str) + + def _convert_token_to_id(self, token): + """Converts a token (str) in an id using the vocab.""" + return self.sp_model.piece_to_id(token) + + def _convert_id_to_token(self, index): + """Converts an index (integer) in a token (str) using the vocab.""" + token = self.sp_model.IdToPiece(index) + return token + + def _maybe_add_prefix_space(self, tokens, decoded): + if tokens and tokens[0] not in self.no_prefix_space_tokens: + return " " + decoded + else: + return decoded + + def convert_tokens_to_string(self, tokens): + """Converts a sequence of tokens (string) in a single string.""" + current_sub_tokens = [] + out_string = "" + prev_is_special = False + for token in tokens: + # make sure that special tokens are not decoded using sentencepiece model + if token in self.all_special_tokens: + if not prev_is_special: + out_string += " " + out_string += self.sp_model.decode(current_sub_tokens) + token + prev_is_special = True + current_sub_tokens = [] + else: + current_sub_tokens.append(token) + prev_is_special = False + out_string += self.sp_model.decode(current_sub_tokens) + out_string = self.clean_up_tokenization(out_string) + out_string = self._maybe_add_prefix_space(tokens=tokens, decoded=out_string) + return out_string[1:] + + def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]: + """ + Save the vocabulary and special tokens file to a directory. + + Args: + save_directory (`str`): + The directory in which to save the vocabulary. + + Returns: + `Tuple(str)`: Paths to the files saved. + """ + if not os.path.isdir(save_directory): + logger.error(f"Vocabulary path ({save_directory}) should be a directory") + return + out_vocab_file = os.path.join( + save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] + ) + + if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file): + copyfile(self.vocab_file, out_vocab_file) + elif not os.path.isfile(self.vocab_file): + with open(out_vocab_file, "wb") as fi: + content_spiece_model = self.sp_model.serialized_model_proto() + fi.write(content_spiece_model) + + return (out_vocab_file,) + + def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): + if self.add_bos_token: + bos_token_ids = [self.bos_token_id] + else: + bos_token_ids = [] + + output = bos_token_ids + token_ids_0 + + if token_ids_1 is not None: + output = output + token_ids_1 + + if self.add_eos_token: + output = output + [self.eos_token_id] + + return output + + def get_special_tokens_mask( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False + ) -> List[int]: + """ + Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding + special tokens using the tokenizer `prepare_for_model` method. + + Args: + token_ids_0 (`List[int]`): + List of IDs. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + already_has_special_tokens (`bool`, *optional*, defaults to `False`): + Whether or not the token list is already formatted with special tokens for the model. + + Returns: + `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + """ + if already_has_special_tokens: + return super().get_special_tokens_mask( + token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True + ) + + if token_ids_1 is None: + return [1] + ([0] * len(token_ids_0)) + [1] + return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1] + + def create_token_type_ids_from_sequences( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: + """ + Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make + use of token type ids, therefore a list of zeros is returned. + + Args: + token_ids_0 (`List[int]`): + List of IDs. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + + Returns: + `List[int]`: List of zeros. + """ + eos = [self.eos_token_id] + + if token_ids_1 is None: + return len(token_ids_0 + eos) * [0] + return len(token_ids_0 + eos + token_ids_1 + eos) * [0] diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6600712949ca9c4ffb50f25275993a21fba0b408 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f868398fc4e05ee1e8aeba95ddf18ddcc45b8bce55d5093bead5bbf80429b48b +size 1477754 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c09328e63848220f5be589fd72e139f38ecbf5fb --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,99 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92538": { + "content": "<|plugin|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92539": { + "content": "<|interpreter|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92540": { + "content": "<|action_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92541": { + "content": "<|action_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92542": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92543": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|action_start|>", + "<|action_end|>", + "<|interpreter|>", + "<|plugin|>" + ], + "auto_map": { + "AutoTokenizer": [ + "tokenization_internlm2.InternLM2Tokenizer", + null + ] + }, + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "tokenizer_class": "InternLM2Tokenizer", + "unk_token": "" +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4beb7891b7c059b9b79a20051cbe1682cbb24bcf --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,41659 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 10000, + "global_step": 5205, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": "3.8388e-07", + "loss": 7.8236, + "slid_loss": 7.8236, + "step": 1, + "time": 30.42 + }, + { + "epoch": 0.0, + "learning_rate": "7.6775e-07", + "loss": 7.8141, + "slid_loss": 7.8189, + "step": 2, + "time": 13.68 + }, + { + "epoch": 0.0, + "learning_rate": "1.1516e-06", + "loss": 7.9123, + "slid_loss": 7.85, + "step": 3, + "time": 14.51 + }, + { + "epoch": 0.0, + "learning_rate": "1.5355e-06", + "loss": 8.4664, + "slid_loss": 8.0041, + "step": 4, + "time": 14.12 + }, + { + "epoch": 0.0, + "learning_rate": "1.9194e-06", + "loss": 7.7963, + "slid_loss": 7.9625, + "step": 5, + "time": 11.58 + }, + { + "epoch": 0.01, + "learning_rate": "2.3033e-06", + "loss": 8.1496, + "slid_loss": 7.9937, + "step": 6, + "time": 12.27 + }, + { + "epoch": 0.01, + "learning_rate": "2.6871e-06", + "loss": 7.9949, + "slid_loss": 7.9939, + "step": 7, + "time": 12.94 + }, + { + "epoch": 0.01, + "learning_rate": "3.0710e-06", + "loss": 8.1081, + "slid_loss": 8.0082, + "step": 8, + "time": 14.3 + }, + { + "epoch": 0.01, + "learning_rate": "3.4549e-06", + "loss": 8.0067, + "slid_loss": 8.008, + "step": 9, + "time": 13.35 + }, + { + "epoch": 0.01, + "learning_rate": "3.8388e-06", + "loss": 7.3789, + "slid_loss": 7.9451, + "step": 10, + "time": 13.36 + }, + { + "epoch": 0.01, + "learning_rate": "4.2226e-06", + "loss": 7.1119, + "slid_loss": 7.8694, + "step": 11, + "time": 12.62 + }, + { + "epoch": 0.01, + "learning_rate": "4.6065e-06", + "loss": 7.1156, + "slid_loss": 7.8065, + "step": 12, + "time": 14.07 + }, + { + "epoch": 0.01, + "learning_rate": "4.9904e-06", + "loss": 6.4367, + "slid_loss": 7.7012, + "step": 13, + "time": 12.71 + }, + { + "epoch": 0.01, + "learning_rate": "5.3743e-06", + "loss": 6.3016, + "slid_loss": 7.6012, + "step": 14, + "time": 13.09 + }, + { + "epoch": 0.01, + "learning_rate": "5.7582e-06", + "loss": 6.4107, + "slid_loss": 7.5218, + "step": 15, + "time": 13.66 + }, + { + "epoch": 0.02, + "learning_rate": "6.1420e-06", + "loss": 5.4338, + "slid_loss": 7.3913, + "step": 16, + "time": 11.62 + }, + { + "epoch": 0.02, + "learning_rate": "6.5259e-06", + "loss": 5.1555, + "slid_loss": 7.2598, + "step": 17, + "time": 13.01 + }, + { + "epoch": 0.02, + "learning_rate": "6.9098e-06", + "loss": 5.268, + "slid_loss": 7.1492, + "step": 18, + "time": 11.91 + }, + { + "epoch": 0.02, + "learning_rate": "7.2937e-06", + "loss": 5.3034, + "slid_loss": 7.052, + "step": 19, + "time": 12.71 + }, + { + "epoch": 0.02, + "learning_rate": "7.6775e-06", + "loss": 5.1647, + "slid_loss": 6.9577, + "step": 20, + "time": 13.25 + }, + { + "epoch": 0.02, + "learning_rate": "8.0614e-06", + "loss": 4.853, + "slid_loss": 6.8574, + "step": 21, + "time": 11.64 + }, + { + "epoch": 0.02, + "learning_rate": "8.4453e-06", + "loss": 4.031, + "slid_loss": 6.729, + "step": 22, + "time": 13.73 + }, + { + "epoch": 0.02, + "learning_rate": "8.8292e-06", + "loss": 4.4817, + "slid_loss": 6.6312, + "step": 23, + "time": 11.47 + }, + { + "epoch": 0.02, + "learning_rate": "9.2131e-06", + "loss": 3.9349, + "slid_loss": 6.5189, + "step": 24, + "time": 13.88 + }, + { + "epoch": 0.02, + "learning_rate": "9.5969e-06", + "loss": 3.6992, + "slid_loss": 6.4061, + "step": 25, + "time": 11.17 + }, + { + "epoch": 0.02, + "learning_rate": "9.9808e-06", + "loss": 3.3853, + "slid_loss": 6.2899, + "step": 26, + "time": 13.64 + }, + { + "epoch": 0.03, + "learning_rate": "1.0365e-05", + "loss": 3.2214, + "slid_loss": 6.1763, + "step": 27, + "time": 13.83 + }, + { + "epoch": 0.03, + "learning_rate": "1.0749e-05", + "loss": 2.7486, + "slid_loss": 6.0539, + "step": 28, + "time": 13.78 + }, + { + "epoch": 0.03, + "learning_rate": "1.1132e-05", + "loss": 3.111, + "slid_loss": 5.9524, + "step": 29, + "time": 14.31 + }, + { + "epoch": 0.03, + "learning_rate": "1.1516e-05", + "loss": 3.0806, + "slid_loss": 5.8566, + "step": 30, + "time": 12.18 + }, + { + "epoch": 0.03, + "learning_rate": "1.1900e-05", + "loss": 2.5142, + "slid_loss": 5.7488, + "step": 31, + "time": 13.03 + }, + { + "epoch": 0.03, + "learning_rate": "1.2284e-05", + "loss": 2.3965, + "slid_loss": 5.6441, + "step": 32, + "time": 13.99 + }, + { + "epoch": 0.03, + "learning_rate": "1.2668e-05", + "loss": 2.3281, + "slid_loss": 5.5436, + "step": 33, + "time": 13.43 + }, + { + "epoch": 0.03, + "learning_rate": "1.3052e-05", + "loss": 2.4237, + "slid_loss": 5.4518, + "step": 34, + "time": 13.29 + }, + { + "epoch": 0.03, + "learning_rate": "1.3436e-05", + "loss": 2.3792, + "slid_loss": 5.364, + "step": 35, + "time": 13.98 + }, + { + "epoch": 0.03, + "learning_rate": "1.3820e-05", + "loss": 2.2186, + "slid_loss": 5.2767, + "step": 36, + "time": 14.22 + }, + { + "epoch": 0.04, + "learning_rate": "1.4203e-05", + "loss": 2.1564, + "slid_loss": 5.1923, + "step": 37, + "time": 12.22 + }, + { + "epoch": 0.04, + "learning_rate": "1.4587e-05", + "loss": 1.8433, + "slid_loss": 5.1042, + "step": 38, + "time": 13.66 + }, + { + "epoch": 0.04, + "learning_rate": "1.4971e-05", + "loss": 1.9437, + "slid_loss": 5.0232, + "step": 39, + "time": 15.1 + }, + { + "epoch": 0.04, + "learning_rate": "1.5355e-05", + "loss": 1.9369, + "slid_loss": 4.946, + "step": 40, + "time": 13.07 + }, + { + "epoch": 0.04, + "learning_rate": "1.5739e-05", + "loss": 1.612, + "slid_loss": 4.8647, + "step": 41, + "time": 11.44 + }, + { + "epoch": 0.04, + "learning_rate": "1.6123e-05", + "loss": 1.6152, + "slid_loss": 4.7873, + "step": 42, + "time": 13.72 + }, + { + "epoch": 0.04, + "learning_rate": "1.6507e-05", + "loss": 1.6263, + "slid_loss": 4.7138, + "step": 43, + "time": 11.49 + }, + { + "epoch": 0.04, + "learning_rate": "1.6891e-05", + "loss": 1.4779, + "slid_loss": 4.6403, + "step": 44, + "time": 13.17 + }, + { + "epoch": 0.04, + "learning_rate": "1.7274e-05", + "loss": 1.5903, + "slid_loss": 4.5725, + "step": 45, + "time": 13.66 + }, + { + "epoch": 0.04, + "learning_rate": "1.7658e-05", + "loss": 1.4481, + "slid_loss": 4.5046, + "step": 46, + "time": 13.07 + }, + { + "epoch": 0.05, + "learning_rate": "1.8042e-05", + "loss": 1.4874, + "slid_loss": 4.4404, + "step": 47, + "time": 13.28 + }, + { + "epoch": 0.05, + "learning_rate": "1.8426e-05", + "loss": 1.3457, + "slid_loss": 4.3759, + "step": 48, + "time": 13.65 + }, + { + "epoch": 0.05, + "learning_rate": "1.8810e-05", + "loss": 1.4239, + "slid_loss": 4.3156, + "step": 49, + "time": 13.19 + }, + { + "epoch": 0.05, + "learning_rate": "1.9194e-05", + "loss": 1.2666, + "slid_loss": 4.2547, + "step": 50, + "time": 13.28 + }, + { + "epoch": 0.05, + "learning_rate": "1.9578e-05", + "loss": 1.2803, + "slid_loss": 4.1963, + "step": 51, + "time": 13.66 + }, + { + "epoch": 0.05, + "learning_rate": "1.9962e-05", + "loss": 1.4732, + "slid_loss": 4.144, + "step": 52, + "time": 12.77 + }, + { + "epoch": 0.05, + "learning_rate": "2.0345e-05", + "loss": 1.2897, + "slid_loss": 4.0901, + "step": 53, + "time": 13.85 + }, + { + "epoch": 0.05, + "learning_rate": "2.0729e-05", + "loss": 1.3271, + "slid_loss": 4.039, + "step": 54, + "time": 13.62 + }, + { + "epoch": 0.05, + "learning_rate": "2.1113e-05", + "loss": 1.3042, + "slid_loss": 3.9892, + "step": 55, + "time": 12.89 + }, + { + "epoch": 0.05, + "learning_rate": "2.1497e-05", + "loss": 1.2658, + "slid_loss": 3.9406, + "step": 56, + "time": 13.93 + }, + { + "epoch": 0.05, + "learning_rate": "2.1881e-05", + "loss": 1.2915, + "slid_loss": 3.8941, + "step": 57, + "time": 14.48 + }, + { + "epoch": 0.06, + "learning_rate": "2.2265e-05", + "loss": 1.2305, + "slid_loss": 3.8482, + "step": 58, + "time": 13.53 + }, + { + "epoch": 0.06, + "learning_rate": "2.2649e-05", + "loss": 1.2974, + "slid_loss": 3.805, + "step": 59, + "time": 11.61 + }, + { + "epoch": 0.06, + "learning_rate": "2.3033e-05", + "loss": 1.2902, + "slid_loss": 3.7631, + "step": 60, + "time": 14.14 + }, + { + "epoch": 0.06, + "learning_rate": "2.3417e-05", + "loss": 1.3371, + "slid_loss": 3.7233, + "step": 61, + "time": 13.32 + }, + { + "epoch": 0.06, + "learning_rate": "2.3800e-05", + "loss": 1.3215, + "slid_loss": 3.6845, + "step": 62, + "time": 13.18 + }, + { + "epoch": 0.06, + "learning_rate": "2.4184e-05", + "loss": 1.2516, + "slid_loss": 3.6459, + "step": 63, + "time": 13.15 + }, + { + "epoch": 0.06, + "learning_rate": "2.4568e-05", + "loss": 1.2789, + "slid_loss": 3.6089, + "step": 64, + "time": 11.21 + }, + { + "epoch": 0.06, + "learning_rate": "2.4952e-05", + "loss": 1.2642, + "slid_loss": 3.5729, + "step": 65, + "time": 14.3 + }, + { + "epoch": 0.06, + "learning_rate": "2.5336e-05", + "loss": 1.277, + "slid_loss": 3.5381, + "step": 66, + "time": 13.22 + }, + { + "epoch": 0.06, + "learning_rate": "2.5720e-05", + "loss": 1.4256, + "slid_loss": 3.5066, + "step": 67, + "time": 13.47 + }, + { + "epoch": 0.07, + "learning_rate": "2.6104e-05", + "loss": 1.2143, + "slid_loss": 3.4728, + "step": 68, + "time": 12.97 + }, + { + "epoch": 0.07, + "learning_rate": "2.6488e-05", + "loss": 1.1297, + "slid_loss": 3.4389, + "step": 69, + "time": 13.74 + }, + { + "epoch": 0.07, + "learning_rate": "2.6871e-05", + "loss": 1.2613, + "slid_loss": 3.4078, + "step": 70, + "time": 13.64 + }, + { + "epoch": 0.07, + "learning_rate": "2.7255e-05", + "loss": 1.1756, + "slid_loss": 3.3763, + "step": 71, + "time": 14.4 + }, + { + "epoch": 0.07, + "learning_rate": "2.7639e-05", + "loss": 1.2345, + "slid_loss": 3.3466, + "step": 72, + "time": 12.93 + }, + { + "epoch": 0.07, + "learning_rate": "2.8023e-05", + "loss": 1.1747, + "slid_loss": 3.3168, + "step": 73, + "time": 11.38 + }, + { + "epoch": 0.07, + "learning_rate": "2.8407e-05", + "loss": 1.1257, + "slid_loss": 3.2872, + "step": 74, + "time": 13.68 + }, + { + "epoch": 0.07, + "learning_rate": "2.8791e-05", + "loss": 1.3381, + "slid_loss": 3.2612, + "step": 75, + "time": 12.83 + }, + { + "epoch": 0.07, + "learning_rate": "2.9175e-05", + "loss": 1.1701, + "slid_loss": 3.2337, + "step": 76, + "time": 13.48 + }, + { + "epoch": 0.07, + "learning_rate": "2.9559e-05", + "loss": 1.1704, + "slid_loss": 3.2069, + "step": 77, + "time": 13.79 + }, + { + "epoch": 0.07, + "learning_rate": "2.9942e-05", + "loss": 1.2441, + "slid_loss": 3.1818, + "step": 78, + "time": 12.22 + }, + { + "epoch": 0.08, + "learning_rate": "3.0326e-05", + "loss": 1.2319, + "slid_loss": 3.1571, + "step": 79, + "time": 11.8 + }, + { + "epoch": 0.08, + "learning_rate": "3.0710e-05", + "loss": 1.2055, + "slid_loss": 3.1327, + "step": 80, + "time": 13.84 + }, + { + "epoch": 0.08, + "learning_rate": "3.1094e-05", + "loss": 1.1839, + "slid_loss": 3.1086, + "step": 81, + "time": 13.38 + }, + { + "epoch": 0.08, + "learning_rate": "3.1478e-05", + "loss": 1.2603, + "slid_loss": 3.0861, + "step": 82, + "time": 13.51 + }, + { + "epoch": 0.08, + "learning_rate": "3.1862e-05", + "loss": 1.0705, + "slid_loss": 3.0618, + "step": 83, + "time": 13.16 + }, + { + "epoch": 0.08, + "learning_rate": "3.2246e-05", + "loss": 1.236, + "slid_loss": 3.0401, + "step": 84, + "time": 13.83 + }, + { + "epoch": 0.08, + "learning_rate": "3.2630e-05", + "loss": 1.2927, + "slid_loss": 3.0195, + "step": 85, + "time": 13.91 + }, + { + "epoch": 0.08, + "learning_rate": "3.3013e-05", + "loss": 1.1653, + "slid_loss": 2.9979, + "step": 86, + "time": 13.88 + }, + { + "epoch": 0.08, + "learning_rate": "3.3397e-05", + "loss": 1.1917, + "slid_loss": 2.9772, + "step": 87, + "time": 13.44 + }, + { + "epoch": 0.08, + "learning_rate": "3.3781e-05", + "loss": 1.0867, + "slid_loss": 2.9557, + "step": 88, + "time": 13.41 + }, + { + "epoch": 0.09, + "learning_rate": "3.4165e-05", + "loss": 1.3115, + "slid_loss": 2.9372, + "step": 89, + "time": 13.74 + }, + { + "epoch": 0.09, + "learning_rate": "3.4549e-05", + "loss": 1.1756, + "slid_loss": 2.9177, + "step": 90, + "time": 13.91 + }, + { + "epoch": 0.09, + "learning_rate": "3.4933e-05", + "loss": 1.1516, + "slid_loss": 2.8982, + "step": 91, + "time": 12.35 + }, + { + "epoch": 0.09, + "learning_rate": "3.5317e-05", + "loss": 1.2829, + "slid_loss": 2.8807, + "step": 92, + "time": 13.4 + }, + { + "epoch": 0.09, + "learning_rate": "3.5701e-05", + "loss": 1.1734, + "slid_loss": 2.8623, + "step": 93, + "time": 13.1 + }, + { + "epoch": 0.09, + "learning_rate": "3.6084e-05", + "loss": 1.168, + "slid_loss": 2.8443, + "step": 94, + "time": 13.42 + }, + { + "epoch": 0.09, + "learning_rate": "3.6468e-05", + "loss": 1.1861, + "slid_loss": 2.8269, + "step": 95, + "time": 13.57 + }, + { + "epoch": 0.09, + "learning_rate": "3.6852e-05", + "loss": 1.1554, + "slid_loss": 2.8094, + "step": 96, + "time": 12.92 + }, + { + "epoch": 0.09, + "learning_rate": "3.7236e-05", + "loss": 1.0486, + "slid_loss": 2.7913, + "step": 97, + "time": 11.1 + }, + { + "epoch": 0.09, + "learning_rate": "3.7620e-05", + "loss": 1.1777, + "slid_loss": 2.7748, + "step": 98, + "time": 12.76 + }, + { + "epoch": 0.1, + "learning_rate": "3.8004e-05", + "loss": 1.0456, + "slid_loss": 2.7574, + "step": 99, + "time": 12.89 + }, + { + "epoch": 0.1, + "learning_rate": "3.8388e-05", + "loss": 1.1834, + "slid_loss": 2.7416, + "step": 100, + "time": 11.2 + }, + { + "epoch": 0.1, + "learning_rate": "3.8772e-05", + "loss": 1.2001, + "slid_loss": 2.6754, + "step": 101, + "time": 11.55 + }, + { + "epoch": 0.1, + "learning_rate": "3.9155e-05", + "loss": 1.1745, + "slid_loss": 2.609, + "step": 102, + "time": 13.0 + }, + { + "epoch": 0.1, + "learning_rate": "3.9539e-05", + "loss": 1.135, + "slid_loss": 2.5412, + "step": 103, + "time": 13.79 + }, + { + "epoch": 0.1, + "learning_rate": "3.9923e-05", + "loss": 1.3012, + "slid_loss": 2.4696, + "step": 104, + "time": 12.68 + }, + { + "epoch": 0.1, + "learning_rate": "4.0307e-05", + "loss": 1.0936, + "slid_loss": 2.4025, + "step": 105, + "time": 13.53 + }, + { + "epoch": 0.1, + "learning_rate": "4.0691e-05", + "loss": 1.3079, + "slid_loss": 2.3341, + "step": 106, + "time": 11.93 + }, + { + "epoch": 0.1, + "learning_rate": "4.1075e-05", + "loss": 1.2324, + "slid_loss": 2.2665, + "step": 107, + "time": 13.36 + }, + { + "epoch": 0.1, + "learning_rate": "4.1459e-05", + "loss": 1.1724, + "slid_loss": 2.1971, + "step": 108, + "time": 11.59 + }, + { + "epoch": 0.1, + "learning_rate": "4.1843e-05", + "loss": 1.0703, + "slid_loss": 2.1278, + "step": 109, + "time": 13.07 + }, + { + "epoch": 0.11, + "learning_rate": "4.2226e-05", + "loss": 1.103, + "slid_loss": 2.065, + "step": 110, + "time": 13.02 + }, + { + "epoch": 0.11, + "learning_rate": "4.2610e-05", + "loss": 1.2006, + "slid_loss": 2.0059, + "step": 111, + "time": 12.15 + }, + { + "epoch": 0.11, + "learning_rate": "4.2994e-05", + "loss": 1.2, + "slid_loss": 1.9467, + "step": 112, + "time": 11.78 + }, + { + "epoch": 0.11, + "learning_rate": "4.3378e-05", + "loss": 1.1365, + "slid_loss": 1.8937, + "step": 113, + "time": 14.08 + }, + { + "epoch": 0.11, + "learning_rate": "4.3762e-05", + "loss": 1.1729, + "slid_loss": 1.8425, + "step": 114, + "time": 12.99 + }, + { + "epoch": 0.11, + "learning_rate": "4.4146e-05", + "loss": 1.2536, + "slid_loss": 1.7909, + "step": 115, + "time": 13.67 + }, + { + "epoch": 0.11, + "learning_rate": "4.4530e-05", + "loss": 1.1477, + "slid_loss": 1.748, + "step": 116, + "time": 10.8 + }, + { + "epoch": 0.11, + "learning_rate": "4.4914e-05", + "loss": 1.1827, + "slid_loss": 1.7083, + "step": 117, + "time": 14.12 + }, + { + "epoch": 0.11, + "learning_rate": "4.5298e-05", + "loss": 1.0507, + "slid_loss": 1.6661, + "step": 118, + "time": 12.9 + }, + { + "epoch": 0.11, + "learning_rate": "4.5681e-05", + "loss": 1.2179, + "slid_loss": 1.6253, + "step": 119, + "time": 11.94 + }, + { + "epoch": 0.12, + "learning_rate": "4.6065e-05", + "loss": 1.0919, + "slid_loss": 1.5845, + "step": 120, + "time": 13.57 + }, + { + "epoch": 0.12, + "learning_rate": "4.6449e-05", + "loss": 1.1115, + "slid_loss": 1.5471, + "step": 121, + "time": 13.49 + }, + { + "epoch": 0.12, + "learning_rate": "4.6833e-05", + "loss": 1.1144, + "slid_loss": 1.518, + "step": 122, + "time": 13.82 + }, + { + "epoch": 0.12, + "learning_rate": "4.7217e-05", + "loss": 1.1727, + "slid_loss": 1.4849, + "step": 123, + "time": 12.95 + }, + { + "epoch": 0.12, + "learning_rate": "4.7601e-05", + "loss": 1.1816, + "slid_loss": 1.4573, + "step": 124, + "time": 13.23 + }, + { + "epoch": 0.12, + "learning_rate": "4.7985e-05", + "loss": 1.1854, + "slid_loss": 1.4322, + "step": 125, + "time": 13.22 + }, + { + "epoch": 0.12, + "learning_rate": "4.8369e-05", + "loss": 1.1493, + "slid_loss": 1.4098, + "step": 126, + "time": 12.84 + }, + { + "epoch": 0.12, + "learning_rate": "4.8752e-05", + "loss": 1.148, + "slid_loss": 1.3891, + "step": 127, + "time": 12.26 + }, + { + "epoch": 0.12, + "learning_rate": "4.9136e-05", + "loss": 1.1457, + "slid_loss": 1.3731, + "step": 128, + "time": 13.94 + }, + { + "epoch": 0.12, + "learning_rate": "4.9520e-05", + "loss": 1.0894, + "slid_loss": 1.3529, + "step": 129, + "time": 11.57 + }, + { + "epoch": 0.12, + "learning_rate": "4.9904e-05", + "loss": 1.1805, + "slid_loss": 1.3339, + "step": 130, + "time": 13.7 + }, + { + "epoch": 0.13, + "learning_rate": "5.0288e-05", + "loss": 1.2985, + "slid_loss": 1.3217, + "step": 131, + "time": 13.13 + }, + { + "epoch": 0.13, + "learning_rate": "5.0672e-05", + "loss": 1.2082, + "slid_loss": 1.3098, + "step": 132, + "time": 11.74 + }, + { + "epoch": 0.13, + "learning_rate": "5.1056e-05", + "loss": 1.1836, + "slid_loss": 1.2984, + "step": 133, + "time": 12.08 + }, + { + "epoch": 0.13, + "learning_rate": "5.1440e-05", + "loss": 1.0953, + "slid_loss": 1.2851, + "step": 134, + "time": 13.17 + }, + { + "epoch": 0.13, + "learning_rate": "5.1823e-05", + "loss": 1.1741, + "slid_loss": 1.273, + "step": 135, + "time": 13.01 + }, + { + "epoch": 0.13, + "learning_rate": "5.2207e-05", + "loss": 1.2184, + "slid_loss": 1.263, + "step": 136, + "time": 12.95 + }, + { + "epoch": 0.13, + "learning_rate": "5.2591e-05", + "loss": 0.9831, + "slid_loss": 1.2513, + "step": 137, + "time": 11.96 + }, + { + "epoch": 0.13, + "learning_rate": "5.2975e-05", + "loss": 1.1949, + "slid_loss": 1.2448, + "step": 138, + "time": 13.73 + }, + { + "epoch": 0.13, + "learning_rate": "5.3359e-05", + "loss": 1.0852, + "slid_loss": 1.2362, + "step": 139, + "time": 12.81 + }, + { + "epoch": 0.13, + "learning_rate": "5.3743e-05", + "loss": 1.1562, + "slid_loss": 1.2284, + "step": 140, + "time": 13.78 + }, + { + "epoch": 0.14, + "learning_rate": "5.4127e-05", + "loss": 1.3207, + "slid_loss": 1.2255, + "step": 141, + "time": 14.15 + }, + { + "epoch": 0.14, + "learning_rate": "5.4511e-05", + "loss": 1.2369, + "slid_loss": 1.2217, + "step": 142, + "time": 13.37 + }, + { + "epoch": 0.14, + "learning_rate": "5.4894e-05", + "loss": 1.1405, + "slid_loss": 1.2169, + "step": 143, + "time": 13.59 + }, + { + "epoch": 0.14, + "learning_rate": "5.5278e-05", + "loss": 1.1452, + "slid_loss": 1.2135, + "step": 144, + "time": 13.62 + }, + { + "epoch": 0.14, + "learning_rate": "5.5662e-05", + "loss": 1.3261, + "slid_loss": 1.2109, + "step": 145, + "time": 13.76 + }, + { + "epoch": 0.14, + "learning_rate": "5.6046e-05", + "loss": 1.1188, + "slid_loss": 1.2076, + "step": 146, + "time": 13.53 + }, + { + "epoch": 0.14, + "learning_rate": "5.6430e-05", + "loss": 1.1551, + "slid_loss": 1.2043, + "step": 147, + "time": 12.6 + }, + { + "epoch": 0.14, + "learning_rate": "5.6814e-05", + "loss": 1.1416, + "slid_loss": 1.2022, + "step": 148, + "time": 13.91 + }, + { + "epoch": 0.14, + "learning_rate": "5.7198e-05", + "loss": 1.1112, + "slid_loss": 1.1991, + "step": 149, + "time": 11.75 + }, + { + "epoch": 0.14, + "learning_rate": "5.7582e-05", + "loss": 1.1905, + "slid_loss": 1.1984, + "step": 150, + "time": 12.6 + }, + { + "epoch": 0.15, + "learning_rate": "5.7965e-05", + "loss": 1.1339, + "slid_loss": 1.1969, + "step": 151, + "time": 13.72 + }, + { + "epoch": 0.15, + "learning_rate": "5.8349e-05", + "loss": 1.1841, + "slid_loss": 1.194, + "step": 152, + "time": 13.36 + }, + { + "epoch": 0.15, + "learning_rate": "5.8733e-05", + "loss": 1.1096, + "slid_loss": 1.1922, + "step": 153, + "time": 13.26 + }, + { + "epoch": 0.15, + "learning_rate": "5.9117e-05", + "loss": 1.1083, + "slid_loss": 1.19, + "step": 154, + "time": 12.95 + }, + { + "epoch": 0.15, + "learning_rate": "5.9501e-05", + "loss": 1.0981, + "slid_loss": 1.188, + "step": 155, + "time": 12.87 + }, + { + "epoch": 0.15, + "learning_rate": "5.9885e-05", + "loss": 1.0515, + "slid_loss": 1.1858, + "step": 156, + "time": 12.8 + }, + { + "epoch": 0.15, + "learning_rate": "6.0269e-05", + "loss": 1.1457, + "slid_loss": 1.1844, + "step": 157, + "time": 14.29 + }, + { + "epoch": 0.15, + "learning_rate": "6.0653e-05", + "loss": 1.1588, + "slid_loss": 1.1836, + "step": 158, + "time": 12.79 + }, + { + "epoch": 0.15, + "learning_rate": "6.1036e-05", + "loss": 1.141, + "slid_loss": 1.1821, + "step": 159, + "time": 13.76 + }, + { + "epoch": 0.15, + "learning_rate": "6.1420e-05", + "loss": 1.2085, + "slid_loss": 1.1813, + "step": 160, + "time": 12.75 + }, + { + "epoch": 0.15, + "learning_rate": "6.1804e-05", + "loss": 1.0107, + "slid_loss": 1.178, + "step": 161, + "time": 13.2 + }, + { + "epoch": 0.16, + "learning_rate": "6.2188e-05", + "loss": 1.1931, + "slid_loss": 1.1767, + "step": 162, + "time": 13.93 + }, + { + "epoch": 0.16, + "learning_rate": "6.2572e-05", + "loss": 1.1413, + "slid_loss": 1.1756, + "step": 163, + "time": 12.96 + }, + { + "epoch": 0.16, + "learning_rate": "6.2956e-05", + "loss": 1.2096, + "slid_loss": 1.1749, + "step": 164, + "time": 13.31 + }, + { + "epoch": 0.16, + "learning_rate": "6.3340e-05", + "loss": 1.2266, + "slid_loss": 1.1745, + "step": 165, + "time": 13.7 + }, + { + "epoch": 0.16, + "learning_rate": "6.3724e-05", + "loss": 1.1511, + "slid_loss": 1.1733, + "step": 166, + "time": 13.25 + }, + { + "epoch": 0.16, + "learning_rate": "6.4107e-05", + "loss": 1.0903, + "slid_loss": 1.1699, + "step": 167, + "time": 13.46 + }, + { + "epoch": 0.16, + "learning_rate": "6.4491e-05", + "loss": 1.1374, + "slid_loss": 1.1692, + "step": 168, + "time": 13.69 + }, + { + "epoch": 0.16, + "learning_rate": "6.4875e-05", + "loss": 1.134, + "slid_loss": 1.1692, + "step": 169, + "time": 12.1 + }, + { + "epoch": 0.16, + "learning_rate": "6.5259e-05", + "loss": 1.2274, + "slid_loss": 1.1689, + "step": 170, + "time": 14.0 + }, + { + "epoch": 0.16, + "learning_rate": "6.5643e-05", + "loss": 1.0998, + "slid_loss": 1.1681, + "step": 171, + "time": 13.25 + }, + { + "epoch": 0.17, + "learning_rate": "6.6027e-05", + "loss": 1.1588, + "slid_loss": 1.1673, + "step": 172, + "time": 12.94 + }, + { + "epoch": 0.17, + "learning_rate": "6.6411e-05", + "loss": 1.1034, + "slid_loss": 1.1666, + "step": 173, + "time": 12.84 + }, + { + "epoch": 0.17, + "learning_rate": "6.6795e-05", + "loss": 1.1018, + "slid_loss": 1.1664, + "step": 174, + "time": 12.14 + }, + { + "epoch": 0.17, + "learning_rate": "6.7179e-05", + "loss": 1.2207, + "slid_loss": 1.1652, + "step": 175, + "time": 13.79 + }, + { + "epoch": 0.17, + "learning_rate": "6.7562e-05", + "loss": 1.1205, + "slid_loss": 1.1647, + "step": 176, + "time": 13.27 + }, + { + "epoch": 0.17, + "learning_rate": "6.7946e-05", + "loss": 1.0301, + "slid_loss": 1.1633, + "step": 177, + "time": 13.4 + }, + { + "epoch": 0.17, + "learning_rate": "6.8330e-05", + "loss": 1.0419, + "slid_loss": 1.1613, + "step": 178, + "time": 14.18 + }, + { + "epoch": 0.17, + "learning_rate": "6.8714e-05", + "loss": 1.0584, + "slid_loss": 1.1596, + "step": 179, + "time": 12.28 + }, + { + "epoch": 0.17, + "learning_rate": "6.9098e-05", + "loss": 1.1984, + "slid_loss": 1.1595, + "step": 180, + "time": 12.81 + }, + { + "epoch": 0.17, + "learning_rate": "6.9482e-05", + "loss": 1.1476, + "slid_loss": 1.1591, + "step": 181, + "time": 12.67 + }, + { + "epoch": 0.17, + "learning_rate": "6.9866e-05", + "loss": 1.0205, + "slid_loss": 1.1567, + "step": 182, + "time": 13.03 + }, + { + "epoch": 0.18, + "learning_rate": "7.0250e-05", + "loss": 1.1188, + "slid_loss": 1.1572, + "step": 183, + "time": 13.11 + }, + { + "epoch": 0.18, + "learning_rate": "7.0633e-05", + "loss": 1.1847, + "slid_loss": 1.1567, + "step": 184, + "time": 13.85 + }, + { + "epoch": 0.18, + "learning_rate": "7.1017e-05", + "loss": 1.1538, + "slid_loss": 1.1553, + "step": 185, + "time": 13.11 + }, + { + "epoch": 0.18, + "learning_rate": "7.1401e-05", + "loss": 1.1634, + "slid_loss": 1.1553, + "step": 186, + "time": 13.35 + }, + { + "epoch": 0.18, + "learning_rate": "7.1785e-05", + "loss": 1.0253, + "slid_loss": 1.1536, + "step": 187, + "time": 13.7 + }, + { + "epoch": 0.18, + "learning_rate": "7.2169e-05", + "loss": 1.0978, + "slid_loss": 1.1537, + "step": 188, + "time": 12.95 + }, + { + "epoch": 0.18, + "learning_rate": "7.2553e-05", + "loss": 1.1685, + "slid_loss": 1.1523, + "step": 189, + "time": 13.35 + }, + { + "epoch": 0.18, + "learning_rate": "7.2937e-05", + "loss": 1.1842, + "slid_loss": 1.1524, + "step": 190, + "time": 13.56 + }, + { + "epoch": 0.18, + "learning_rate": "7.3321e-05", + "loss": 1.1207, + "slid_loss": 1.1521, + "step": 191, + "time": 12.95 + }, + { + "epoch": 0.18, + "learning_rate": "7.3704e-05", + "loss": 1.1619, + "slid_loss": 1.1509, + "step": 192, + "time": 13.46 + }, + { + "epoch": 0.19, + "learning_rate": "7.4088e-05", + "loss": 1.1596, + "slid_loss": 1.1507, + "step": 193, + "time": 14.22 + }, + { + "epoch": 0.19, + "learning_rate": "7.4472e-05", + "loss": 1.1408, + "slid_loss": 1.1505, + "step": 194, + "time": 13.57 + }, + { + "epoch": 0.19, + "learning_rate": "7.4856e-05", + "loss": 1.2049, + "slid_loss": 1.1507, + "step": 195, + "time": 13.25 + }, + { + "epoch": 0.19, + "learning_rate": "7.5240e-05", + "loss": 1.192, + "slid_loss": 1.151, + "step": 196, + "time": 13.77 + }, + { + "epoch": 0.19, + "learning_rate": "7.5624e-05", + "loss": 1.05, + "slid_loss": 1.151, + "step": 197, + "time": 13.84 + }, + { + "epoch": 0.19, + "learning_rate": "7.6008e-05", + "loss": 1.1504, + "slid_loss": 1.1508, + "step": 198, + "time": 12.89 + }, + { + "epoch": 0.19, + "learning_rate": "7.6392e-05", + "loss": 1.1557, + "slid_loss": 1.1519, + "step": 199, + "time": 11.6 + }, + { + "epoch": 0.19, + "learning_rate": "7.6775e-05", + "loss": 1.1347, + "slid_loss": 1.1514, + "step": 200, + "time": 14.39 + }, + { + "epoch": 0.19, + "learning_rate": "7.7159e-05", + "loss": 1.0396, + "slid_loss": 1.1498, + "step": 201, + "time": 13.35 + }, + { + "epoch": 0.19, + "learning_rate": "7.7543e-05", + "loss": 1.0315, + "slid_loss": 1.1483, + "step": 202, + "time": 12.04 + }, + { + "epoch": 0.2, + "learning_rate": "7.7927e-05", + "loss": 1.0682, + "slid_loss": 1.1477, + "step": 203, + "time": 13.47 + }, + { + "epoch": 0.2, + "learning_rate": "7.8311e-05", + "loss": 1.0855, + "slid_loss": 1.1455, + "step": 204, + "time": 12.87 + }, + { + "epoch": 0.2, + "learning_rate": "7.8695e-05", + "loss": 1.2007, + "slid_loss": 1.1466, + "step": 205, + "time": 13.74 + }, + { + "epoch": 0.2, + "learning_rate": "7.9079e-05", + "loss": 1.1677, + "slid_loss": 1.1452, + "step": 206, + "time": 12.92 + }, + { + "epoch": 0.2, + "learning_rate": "7.9463e-05", + "loss": 1.225, + "slid_loss": 1.1451, + "step": 207, + "time": 12.72 + }, + { + "epoch": 0.2, + "learning_rate": "7.9846e-05", + "loss": 1.0762, + "slid_loss": 1.1441, + "step": 208, + "time": 13.34 + }, + { + "epoch": 0.2, + "learning_rate": "8.0230e-05", + "loss": 1.2651, + "slid_loss": 1.1461, + "step": 209, + "time": 11.94 + }, + { + "epoch": 0.2, + "learning_rate": "8.0614e-05", + "loss": 1.2334, + "slid_loss": 1.1474, + "step": 210, + "time": 13.82 + }, + { + "epoch": 0.2, + "learning_rate": "8.0998e-05", + "loss": 1.1817, + "slid_loss": 1.1472, + "step": 211, + "time": 13.56 + }, + { + "epoch": 0.2, + "learning_rate": "8.1382e-05", + "loss": 1.1834, + "slid_loss": 1.147, + "step": 212, + "time": 12.14 + }, + { + "epoch": 0.2, + "learning_rate": "8.1766e-05", + "loss": 1.0714, + "slid_loss": 1.1464, + "step": 213, + "time": 13.21 + }, + { + "epoch": 0.21, + "learning_rate": "8.2150e-05", + "loss": 1.1633, + "slid_loss": 1.1463, + "step": 214, + "time": 13.95 + }, + { + "epoch": 0.21, + "learning_rate": "8.2534e-05", + "loss": 1.1507, + "slid_loss": 1.1453, + "step": 215, + "time": 13.2 + }, + { + "epoch": 0.21, + "learning_rate": "8.2917e-05", + "loss": 1.1519, + "slid_loss": 1.1453, + "step": 216, + "time": 13.49 + }, + { + "epoch": 0.21, + "learning_rate": "8.3301e-05", + "loss": 1.2125, + "slid_loss": 1.1456, + "step": 217, + "time": 14.76 + }, + { + "epoch": 0.21, + "learning_rate": "8.3685e-05", + "loss": 1.178, + "slid_loss": 1.1469, + "step": 218, + "time": 13.13 + }, + { + "epoch": 0.21, + "learning_rate": "8.4069e-05", + "loss": 1.1489, + "slid_loss": 1.1462, + "step": 219, + "time": 13.25 + }, + { + "epoch": 0.21, + "learning_rate": "8.4453e-05", + "loss": 1.0851, + "slid_loss": 1.1461, + "step": 220, + "time": 13.56 + }, + { + "epoch": 0.21, + "learning_rate": "8.4837e-05", + "loss": 1.1002, + "slid_loss": 1.146, + "step": 221, + "time": 14.01 + }, + { + "epoch": 0.21, + "learning_rate": "8.5221e-05", + "loss": 1.1486, + "slid_loss": 1.1464, + "step": 222, + "time": 14.01 + }, + { + "epoch": 0.21, + "learning_rate": "8.5605e-05", + "loss": 1.2666, + "slid_loss": 1.1473, + "step": 223, + "time": 14.16 + }, + { + "epoch": 0.22, + "learning_rate": "8.5988e-05", + "loss": 0.9827, + "slid_loss": 1.1453, + "step": 224, + "time": 13.3 + }, + { + "epoch": 0.22, + "learning_rate": "8.6372e-05", + "loss": 1.2495, + "slid_loss": 1.1459, + "step": 225, + "time": 11.82 + }, + { + "epoch": 0.22, + "learning_rate": "8.6756e-05", + "loss": 1.0493, + "slid_loss": 1.1449, + "step": 226, + "time": 13.01 + }, + { + "epoch": 0.22, + "learning_rate": "8.7140e-05", + "loss": 1.1185, + "slid_loss": 1.1446, + "step": 227, + "time": 13.92 + }, + { + "epoch": 0.22, + "learning_rate": "8.7524e-05", + "loss": 1.1107, + "slid_loss": 1.1443, + "step": 228, + "time": 11.21 + }, + { + "epoch": 0.22, + "learning_rate": "8.7908e-05", + "loss": 1.1272, + "slid_loss": 1.1447, + "step": 229, + "time": 12.47 + }, + { + "epoch": 0.22, + "learning_rate": "8.8292e-05", + "loss": 1.1637, + "slid_loss": 1.1445, + "step": 230, + "time": 13.36 + }, + { + "epoch": 0.22, + "learning_rate": "8.8676e-05", + "loss": 1.1073, + "slid_loss": 1.1426, + "step": 231, + "time": 13.91 + }, + { + "epoch": 0.22, + "learning_rate": "8.9060e-05", + "loss": 1.1314, + "slid_loss": 1.1418, + "step": 232, + "time": 11.91 + }, + { + "epoch": 0.22, + "learning_rate": "8.9443e-05", + "loss": 1.1864, + "slid_loss": 1.1419, + "step": 233, + "time": 13.29 + }, + { + "epoch": 0.22, + "learning_rate": "8.9827e-05", + "loss": 1.161, + "slid_loss": 1.1425, + "step": 234, + "time": 11.8 + }, + { + "epoch": 0.23, + "learning_rate": "9.0211e-05", + "loss": 1.2014, + "slid_loss": 1.1428, + "step": 235, + "time": 13.69 + }, + { + "epoch": 0.23, + "learning_rate": "9.0595e-05", + "loss": 1.1488, + "slid_loss": 1.1421, + "step": 236, + "time": 13.2 + }, + { + "epoch": 0.23, + "learning_rate": "9.0979e-05", + "loss": 1.1874, + "slid_loss": 1.1441, + "step": 237, + "time": 13.69 + }, + { + "epoch": 0.23, + "learning_rate": "9.1363e-05", + "loss": 1.2081, + "slid_loss": 1.1443, + "step": 238, + "time": 12.94 + }, + { + "epoch": 0.23, + "learning_rate": "9.1747e-05", + "loss": 1.1443, + "slid_loss": 1.1449, + "step": 239, + "time": 13.6 + }, + { + "epoch": 0.23, + "learning_rate": "9.2131e-05", + "loss": 1.1631, + "slid_loss": 1.1449, + "step": 240, + "time": 13.79 + }, + { + "epoch": 0.23, + "learning_rate": "9.2514e-05", + "loss": 1.1786, + "slid_loss": 1.1435, + "step": 241, + "time": 13.67 + }, + { + "epoch": 0.23, + "learning_rate": "9.2898e-05", + "loss": 1.1789, + "slid_loss": 1.1429, + "step": 242, + "time": 13.09 + }, + { + "epoch": 0.23, + "learning_rate": "9.3282e-05", + "loss": 1.1783, + "slid_loss": 1.1433, + "step": 243, + "time": 13.19 + }, + { + "epoch": 0.23, + "learning_rate": "9.3666e-05", + "loss": 1.1004, + "slid_loss": 1.1429, + "step": 244, + "time": 13.39 + }, + { + "epoch": 0.24, + "learning_rate": "9.4050e-05", + "loss": 1.1534, + "slid_loss": 1.1411, + "step": 245, + "time": 13.65 + }, + { + "epoch": 0.24, + "learning_rate": "9.4434e-05", + "loss": 1.1435, + "slid_loss": 1.1414, + "step": 246, + "time": 13.73 + }, + { + "epoch": 0.24, + "learning_rate": "9.4818e-05", + "loss": 0.9677, + "slid_loss": 1.1395, + "step": 247, + "time": 11.06 + }, + { + "epoch": 0.24, + "learning_rate": "9.5202e-05", + "loss": 1.0647, + "slid_loss": 1.1387, + "step": 248, + "time": 14.1 + }, + { + "epoch": 0.24, + "learning_rate": "9.5585e-05", + "loss": 1.2235, + "slid_loss": 1.1399, + "step": 249, + "time": 11.99 + }, + { + "epoch": 0.24, + "learning_rate": "9.5969e-05", + "loss": 1.1393, + "slid_loss": 1.1393, + "step": 250, + "time": 13.29 + }, + { + "epoch": 0.24, + "learning_rate": "9.6353e-05", + "loss": 1.0045, + "slid_loss": 1.138, + "step": 251, + "time": 13.85 + }, + { + "epoch": 0.24, + "learning_rate": "9.6737e-05", + "loss": 1.1903, + "slid_loss": 1.1381, + "step": 252, + "time": 13.62 + }, + { + "epoch": 0.24, + "learning_rate": "9.7121e-05", + "loss": 1.0955, + "slid_loss": 1.138, + "step": 253, + "time": 14.07 + }, + { + "epoch": 0.24, + "learning_rate": "9.7505e-05", + "loss": 1.1831, + "slid_loss": 1.1387, + "step": 254, + "time": 14.16 + }, + { + "epoch": 0.24, + "learning_rate": "9.7889e-05", + "loss": 1.1305, + "slid_loss": 1.139, + "step": 255, + "time": 11.9 + }, + { + "epoch": 0.25, + "learning_rate": "9.8273e-05", + "loss": 1.1394, + "slid_loss": 1.1399, + "step": 256, + "time": 11.29 + }, + { + "epoch": 0.25, + "learning_rate": "9.8656e-05", + "loss": 1.1808, + "slid_loss": 1.1403, + "step": 257, + "time": 13.65 + }, + { + "epoch": 0.25, + "learning_rate": "9.9040e-05", + "loss": 1.2763, + "slid_loss": 1.1414, + "step": 258, + "time": 12.85 + }, + { + "epoch": 0.25, + "learning_rate": "9.9424e-05", + "loss": 1.1336, + "slid_loss": 1.1414, + "step": 259, + "time": 12.79 + }, + { + "epoch": 0.25, + "learning_rate": "9.9808e-05", + "loss": 1.0957, + "slid_loss": 1.1402, + "step": 260, + "time": 13.59 + }, + { + "epoch": 0.25, + "learning_rate": "1.0019e-04", + "loss": 1.2769, + "slid_loss": 1.1429, + "step": 261, + "time": 11.94 + }, + { + "epoch": 0.25, + "learning_rate": "1.0058e-04", + "loss": 1.1852, + "slid_loss": 1.1428, + "step": 262, + "time": 12.92 + }, + { + "epoch": 0.25, + "learning_rate": "1.0096e-04", + "loss": 1.1485, + "slid_loss": 1.1429, + "step": 263, + "time": 11.72 + }, + { + "epoch": 0.25, + "learning_rate": "1.0134e-04", + "loss": 1.1632, + "slid_loss": 1.1424, + "step": 264, + "time": 12.8 + }, + { + "epoch": 0.25, + "learning_rate": "1.0173e-04", + "loss": 1.0556, + "slid_loss": 1.1407, + "step": 265, + "time": 13.66 + }, + { + "epoch": 0.26, + "learning_rate": "1.0211e-04", + "loss": 1.0841, + "slid_loss": 1.1401, + "step": 266, + "time": 12.47 + }, + { + "epoch": 0.26, + "learning_rate": "1.0250e-04", + "loss": 1.1209, + "slid_loss": 1.1404, + "step": 267, + "time": 12.88 + }, + { + "epoch": 0.26, + "learning_rate": "1.0288e-04", + "loss": 1.0899, + "slid_loss": 1.1399, + "step": 268, + "time": 14.07 + }, + { + "epoch": 0.26, + "learning_rate": "1.0326e-04", + "loss": 1.0307, + "slid_loss": 1.1389, + "step": 269, + "time": 13.09 + }, + { + "epoch": 0.26, + "learning_rate": "1.0365e-04", + "loss": 1.0207, + "slid_loss": 1.1368, + "step": 270, + "time": 12.89 + }, + { + "epoch": 0.26, + "learning_rate": "1.0403e-04", + "loss": 1.0853, + "slid_loss": 1.1366, + "step": 271, + "time": 11.4 + }, + { + "epoch": 0.26, + "learning_rate": "1.0441e-04", + "loss": 1.1743, + "slid_loss": 1.1368, + "step": 272, + "time": 11.56 + }, + { + "epoch": 0.26, + "learning_rate": "1.0480e-04", + "loss": 1.0893, + "slid_loss": 1.1367, + "step": 273, + "time": 13.4 + }, + { + "epoch": 0.26, + "learning_rate": "1.0518e-04", + "loss": 1.0858, + "slid_loss": 1.1365, + "step": 274, + "time": 12.9 + }, + { + "epoch": 0.26, + "learning_rate": "1.0557e-04", + "loss": 1.1882, + "slid_loss": 1.1362, + "step": 275, + "time": 11.78 + }, + { + "epoch": 0.27, + "learning_rate": "1.0595e-04", + "loss": 1.1338, + "slid_loss": 1.1363, + "step": 276, + "time": 13.03 + }, + { + "epoch": 0.27, + "learning_rate": "1.0633e-04", + "loss": 1.0547, + "slid_loss": 1.1365, + "step": 277, + "time": 13.28 + }, + { + "epoch": 0.27, + "learning_rate": "1.0672e-04", + "loss": 1.0742, + "slid_loss": 1.1369, + "step": 278, + "time": 12.05 + }, + { + "epoch": 0.27, + "learning_rate": "1.0710e-04", + "loss": 1.0833, + "slid_loss": 1.1371, + "step": 279, + "time": 11.89 + }, + { + "epoch": 0.27, + "learning_rate": "1.0749e-04", + "loss": 1.0823, + "slid_loss": 1.136, + "step": 280, + "time": 13.4 + }, + { + "epoch": 0.27, + "learning_rate": "1.0787e-04", + "loss": 1.0255, + "slid_loss": 1.1347, + "step": 281, + "time": 11.85 + }, + { + "epoch": 0.27, + "learning_rate": "1.0825e-04", + "loss": 1.1314, + "slid_loss": 1.1358, + "step": 282, + "time": 12.19 + }, + { + "epoch": 0.27, + "learning_rate": "1.0864e-04", + "loss": 1.0214, + "slid_loss": 1.1349, + "step": 283, + "time": 12.35 + }, + { + "epoch": 0.27, + "learning_rate": "1.0902e-04", + "loss": 1.092, + "slid_loss": 1.1339, + "step": 284, + "time": 13.41 + }, + { + "epoch": 0.27, + "learning_rate": "1.0940e-04", + "loss": 1.1086, + "slid_loss": 1.1335, + "step": 285, + "time": 12.97 + }, + { + "epoch": 0.27, + "learning_rate": "1.0979e-04", + "loss": 1.1321, + "slid_loss": 1.1332, + "step": 286, + "time": 12.21 + }, + { + "epoch": 0.28, + "learning_rate": "1.1017e-04", + "loss": 1.1786, + "slid_loss": 1.1347, + "step": 287, + "time": 12.86 + }, + { + "epoch": 0.28, + "learning_rate": "1.1056e-04", + "loss": 1.0995, + "slid_loss": 1.1347, + "step": 288, + "time": 12.22 + }, + { + "epoch": 0.28, + "learning_rate": "1.1094e-04", + "loss": 1.1757, + "slid_loss": 1.1348, + "step": 289, + "time": 13.0 + }, + { + "epoch": 0.28, + "learning_rate": "1.1132e-04", + "loss": 1.1149, + "slid_loss": 1.1341, + "step": 290, + "time": 11.96 + }, + { + "epoch": 0.28, + "learning_rate": "1.1171e-04", + "loss": 1.0929, + "slid_loss": 1.1338, + "step": 291, + "time": 13.93 + }, + { + "epoch": 0.28, + "learning_rate": "1.1209e-04", + "loss": 1.2009, + "slid_loss": 1.1342, + "step": 292, + "time": 10.88 + }, + { + "epoch": 0.28, + "learning_rate": "1.1248e-04", + "loss": 1.2116, + "slid_loss": 1.1347, + "step": 293, + "time": 13.32 + }, + { + "epoch": 0.28, + "learning_rate": "1.1286e-04", + "loss": 1.1023, + "slid_loss": 1.1344, + "step": 294, + "time": 13.38 + }, + { + "epoch": 0.28, + "learning_rate": "1.1324e-04", + "loss": 1.0516, + "slid_loss": 1.1328, + "step": 295, + "time": 12.27 + }, + { + "epoch": 0.28, + "learning_rate": "1.1363e-04", + "loss": 1.0346, + "slid_loss": 1.1313, + "step": 296, + "time": 14.2 + }, + { + "epoch": 0.29, + "learning_rate": "1.1401e-04", + "loss": 1.0819, + "slid_loss": 1.1316, + "step": 297, + "time": 13.89 + }, + { + "epoch": 0.29, + "learning_rate": "1.1440e-04", + "loss": 1.0712, + "slid_loss": 1.1308, + "step": 298, + "time": 13.5 + }, + { + "epoch": 0.29, + "learning_rate": "1.1478e-04", + "loss": 1.1035, + "slid_loss": 1.1303, + "step": 299, + "time": 13.43 + }, + { + "epoch": 0.29, + "learning_rate": "1.1516e-04", + "loss": 1.1898, + "slid_loss": 1.1308, + "step": 300, + "time": 11.83 + }, + { + "epoch": 0.29, + "learning_rate": "1.1555e-04", + "loss": 1.0281, + "slid_loss": 1.1307, + "step": 301, + "time": 13.84 + }, + { + "epoch": 0.29, + "learning_rate": "1.1593e-04", + "loss": 0.9542, + "slid_loss": 1.1299, + "step": 302, + "time": 13.42 + }, + { + "epoch": 0.29, + "learning_rate": "1.1631e-04", + "loss": 1.1331, + "slid_loss": 1.1306, + "step": 303, + "time": 12.87 + }, + { + "epoch": 0.29, + "learning_rate": "1.1670e-04", + "loss": 1.105, + "slid_loss": 1.1308, + "step": 304, + "time": 14.05 + }, + { + "epoch": 0.29, + "learning_rate": "1.1708e-04", + "loss": 1.1077, + "slid_loss": 1.1298, + "step": 305, + "time": 13.58 + }, + { + "epoch": 0.29, + "learning_rate": "1.1747e-04", + "loss": 1.0667, + "slid_loss": 1.1288, + "step": 306, + "time": 13.03 + }, + { + "epoch": 0.29, + "learning_rate": "1.1785e-04", + "loss": 1.1468, + "slid_loss": 1.128, + "step": 307, + "time": 12.5 + }, + { + "epoch": 0.3, + "learning_rate": "1.1823e-04", + "loss": 1.1221, + "slid_loss": 1.1285, + "step": 308, + "time": 11.72 + }, + { + "epoch": 0.3, + "learning_rate": "1.1862e-04", + "loss": 1.1155, + "slid_loss": 1.127, + "step": 309, + "time": 13.25 + }, + { + "epoch": 0.3, + "learning_rate": "1.1900e-04", + "loss": 1.1023, + "slid_loss": 1.1257, + "step": 310, + "time": 13.83 + }, + { + "epoch": 0.3, + "learning_rate": "1.1939e-04", + "loss": 1.0897, + "slid_loss": 1.1248, + "step": 311, + "time": 14.47 + }, + { + "epoch": 0.3, + "learning_rate": "1.1977e-04", + "loss": 1.2274, + "slid_loss": 1.1252, + "step": 312, + "time": 12.87 + }, + { + "epoch": 0.3, + "learning_rate": "1.2015e-04", + "loss": 1.0339, + "slid_loss": 1.1248, + "step": 313, + "time": 14.41 + }, + { + "epoch": 0.3, + "learning_rate": "1.2054e-04", + "loss": 1.0379, + "slid_loss": 1.1236, + "step": 314, + "time": 13.4 + }, + { + "epoch": 0.3, + "learning_rate": "1.2092e-04", + "loss": 1.1425, + "slid_loss": 1.1235, + "step": 315, + "time": 13.36 + }, + { + "epoch": 0.3, + "learning_rate": "1.2131e-04", + "loss": 1.1424, + "slid_loss": 1.1234, + "step": 316, + "time": 12.76 + }, + { + "epoch": 0.3, + "learning_rate": "1.2169e-04", + "loss": 1.0492, + "slid_loss": 1.1218, + "step": 317, + "time": 13.7 + }, + { + "epoch": 0.31, + "learning_rate": "1.2207e-04", + "loss": 1.0339, + "slid_loss": 1.1203, + "step": 318, + "time": 13.21 + }, + { + "epoch": 0.31, + "learning_rate": "1.2246e-04", + "loss": 1.0246, + "slid_loss": 1.1191, + "step": 319, + "time": 10.85 + }, + { + "epoch": 0.31, + "learning_rate": "1.2284e-04", + "loss": 1.1394, + "slid_loss": 1.1196, + "step": 320, + "time": 12.82 + }, + { + "epoch": 0.31, + "learning_rate": "1.2322e-04", + "loss": 1.0116, + "slid_loss": 1.1188, + "step": 321, + "time": 12.63 + }, + { + "epoch": 0.31, + "learning_rate": "1.2361e-04", + "loss": 1.0519, + "slid_loss": 1.1178, + "step": 322, + "time": 13.44 + }, + { + "epoch": 0.31, + "learning_rate": "1.2399e-04", + "loss": 1.1848, + "slid_loss": 1.117, + "step": 323, + "time": 13.94 + }, + { + "epoch": 0.31, + "learning_rate": "1.2438e-04", + "loss": 1.1344, + "slid_loss": 1.1185, + "step": 324, + "time": 13.44 + }, + { + "epoch": 0.31, + "learning_rate": "1.2476e-04", + "loss": 1.1106, + "slid_loss": 1.1171, + "step": 325, + "time": 13.4 + }, + { + "epoch": 0.31, + "learning_rate": "1.2514e-04", + "loss": 1.106, + "slid_loss": 1.1177, + "step": 326, + "time": 15.13 + }, + { + "epoch": 0.31, + "learning_rate": "1.2553e-04", + "loss": 1.0726, + "slid_loss": 1.1172, + "step": 327, + "time": 13.35 + }, + { + "epoch": 0.32, + "learning_rate": "1.2591e-04", + "loss": 1.0443, + "slid_loss": 1.1165, + "step": 328, + "time": 13.14 + }, + { + "epoch": 0.32, + "learning_rate": "1.2630e-04", + "loss": 1.2201, + "slid_loss": 1.1175, + "step": 329, + "time": 10.85 + }, + { + "epoch": 0.32, + "learning_rate": "1.2668e-04", + "loss": 0.9423, + "slid_loss": 1.1153, + "step": 330, + "time": 13.64 + }, + { + "epoch": 0.32, + "learning_rate": "1.2706e-04", + "loss": 1.124, + "slid_loss": 1.1154, + "step": 331, + "time": 13.0 + }, + { + "epoch": 0.32, + "learning_rate": "1.2745e-04", + "loss": 1.1948, + "slid_loss": 1.1161, + "step": 332, + "time": 14.18 + }, + { + "epoch": 0.32, + "learning_rate": "1.2783e-04", + "loss": 1.0845, + "slid_loss": 1.115, + "step": 333, + "time": 13.71 + }, + { + "epoch": 0.32, + "learning_rate": "1.2821e-04", + "loss": 1.0932, + "slid_loss": 1.1144, + "step": 334, + "time": 13.18 + }, + { + "epoch": 0.32, + "learning_rate": "1.2860e-04", + "loss": 1.1208, + "slid_loss": 1.1136, + "step": 335, + "time": 12.01 + }, + { + "epoch": 0.32, + "learning_rate": "1.2898e-04", + "loss": 1.2009, + "slid_loss": 1.1141, + "step": 336, + "time": 11.5 + }, + { + "epoch": 0.32, + "learning_rate": "1.2937e-04", + "loss": 1.0812, + "slid_loss": 1.113, + "step": 337, + "time": 13.75 + }, + { + "epoch": 0.32, + "learning_rate": "1.2975e-04", + "loss": 1.0424, + "slid_loss": 1.1114, + "step": 338, + "time": 12.27 + }, + { + "epoch": 0.33, + "learning_rate": "1.3013e-04", + "loss": 1.1397, + "slid_loss": 1.1113, + "step": 339, + "time": 13.57 + }, + { + "epoch": 0.33, + "learning_rate": "1.3052e-04", + "loss": 1.1018, + "slid_loss": 1.1107, + "step": 340, + "time": 14.13 + }, + { + "epoch": 0.33, + "learning_rate": "1.3090e-04", + "loss": 1.0822, + "slid_loss": 1.1097, + "step": 341, + "time": 14.58 + }, + { + "epoch": 0.33, + "learning_rate": "1.3129e-04", + "loss": 1.0471, + "slid_loss": 1.1084, + "step": 342, + "time": 13.32 + }, + { + "epoch": 0.33, + "learning_rate": "1.3167e-04", + "loss": 1.1075, + "slid_loss": 1.1077, + "step": 343, + "time": 14.26 + }, + { + "epoch": 0.33, + "learning_rate": "1.3205e-04", + "loss": 1.1103, + "slid_loss": 1.1078, + "step": 344, + "time": 10.91 + }, + { + "epoch": 0.33, + "learning_rate": "1.3244e-04", + "loss": 1.0956, + "slid_loss": 1.1072, + "step": 345, + "time": 13.25 + }, + { + "epoch": 0.33, + "learning_rate": "1.3282e-04", + "loss": 0.9834, + "slid_loss": 1.1056, + "step": 346, + "time": 11.59 + }, + { + "epoch": 0.33, + "learning_rate": "1.3321e-04", + "loss": 1.0889, + "slid_loss": 1.1068, + "step": 347, + "time": 12.24 + }, + { + "epoch": 0.33, + "learning_rate": "1.3359e-04", + "loss": 1.0538, + "slid_loss": 1.1067, + "step": 348, + "time": 12.18 + }, + { + "epoch": 0.34, + "learning_rate": "1.3397e-04", + "loss": 1.0294, + "slid_loss": 1.1048, + "step": 349, + "time": 14.03 + }, + { + "epoch": 0.34, + "learning_rate": "1.3436e-04", + "loss": 1.0403, + "slid_loss": 1.1038, + "step": 350, + "time": 13.37 + }, + { + "epoch": 0.34, + "learning_rate": "1.3474e-04", + "loss": 1.2006, + "slid_loss": 1.1058, + "step": 351, + "time": 13.67 + }, + { + "epoch": 0.34, + "learning_rate": "1.3512e-04", + "loss": 1.1609, + "slid_loss": 1.1055, + "step": 352, + "time": 13.38 + }, + { + "epoch": 0.34, + "learning_rate": "1.3551e-04", + "loss": 1.1069, + "slid_loss": 1.1056, + "step": 353, + "time": 13.69 + }, + { + "epoch": 0.34, + "learning_rate": "1.3589e-04", + "loss": 0.9822, + "slid_loss": 1.1036, + "step": 354, + "time": 14.02 + }, + { + "epoch": 0.34, + "learning_rate": "1.3628e-04", + "loss": 1.013, + "slid_loss": 1.1024, + "step": 355, + "time": 14.23 + }, + { + "epoch": 0.34, + "learning_rate": "1.3666e-04", + "loss": 1.1721, + "slid_loss": 1.1027, + "step": 356, + "time": 13.72 + }, + { + "epoch": 0.34, + "learning_rate": "1.3704e-04", + "loss": 1.0672, + "slid_loss": 1.1016, + "step": 357, + "time": 13.12 + }, + { + "epoch": 0.34, + "learning_rate": "1.3743e-04", + "loss": 1.0824, + "slid_loss": 1.0996, + "step": 358, + "time": 13.16 + }, + { + "epoch": 0.34, + "learning_rate": "1.3781e-04", + "loss": 1.1155, + "slid_loss": 1.0995, + "step": 359, + "time": 12.04 + }, + { + "epoch": 0.35, + "learning_rate": "1.3820e-04", + "loss": 1.0015, + "slid_loss": 1.0985, + "step": 360, + "time": 12.14 + }, + { + "epoch": 0.35, + "learning_rate": "1.3858e-04", + "loss": 1.0656, + "slid_loss": 1.0964, + "step": 361, + "time": 12.31 + }, + { + "epoch": 0.35, + "learning_rate": "1.3896e-04", + "loss": 1.2371, + "slid_loss": 1.0969, + "step": 362, + "time": 13.2 + }, + { + "epoch": 0.35, + "learning_rate": "1.3935e-04", + "loss": 1.138, + "slid_loss": 1.0968, + "step": 363, + "time": 13.75 + }, + { + "epoch": 0.35, + "learning_rate": "1.3973e-04", + "loss": 1.1079, + "slid_loss": 1.0963, + "step": 364, + "time": 11.31 + }, + { + "epoch": 0.35, + "learning_rate": "1.4012e-04", + "loss": 1.0055, + "slid_loss": 1.0958, + "step": 365, + "time": 11.94 + }, + { + "epoch": 0.35, + "learning_rate": "1.4050e-04", + "loss": 1.1014, + "slid_loss": 1.0959, + "step": 366, + "time": 12.24 + }, + { + "epoch": 0.35, + "learning_rate": "1.4088e-04", + "loss": 1.1014, + "slid_loss": 1.0957, + "step": 367, + "time": 13.54 + }, + { + "epoch": 0.35, + "learning_rate": "1.4127e-04", + "loss": 1.0626, + "slid_loss": 1.0955, + "step": 368, + "time": 13.78 + }, + { + "epoch": 0.35, + "learning_rate": "1.4165e-04", + "loss": 1.0423, + "slid_loss": 1.0956, + "step": 369, + "time": 12.82 + }, + { + "epoch": 0.36, + "learning_rate": "1.4203e-04", + "loss": 1.2084, + "slid_loss": 1.0975, + "step": 370, + "time": 12.17 + }, + { + "epoch": 0.36, + "learning_rate": "1.4242e-04", + "loss": 1.1612, + "slid_loss": 1.0982, + "step": 371, + "time": 11.88 + }, + { + "epoch": 0.36, + "learning_rate": "1.4280e-04", + "loss": 1.0613, + "slid_loss": 1.0971, + "step": 372, + "time": 15.27 + }, + { + "epoch": 0.36, + "learning_rate": "1.4319e-04", + "loss": 1.0918, + "slid_loss": 1.0971, + "step": 373, + "time": 13.38 + }, + { + "epoch": 0.36, + "learning_rate": "1.4357e-04", + "loss": 0.9901, + "slid_loss": 1.0962, + "step": 374, + "time": 14.06 + }, + { + "epoch": 0.36, + "learning_rate": "1.4395e-04", + "loss": 1.1319, + "slid_loss": 1.0956, + "step": 375, + "time": 12.56 + }, + { + "epoch": 0.36, + "learning_rate": "1.4434e-04", + "loss": 1.0916, + "slid_loss": 1.0952, + "step": 376, + "time": 13.19 + }, + { + "epoch": 0.36, + "learning_rate": "1.4472e-04", + "loss": 1.0855, + "slid_loss": 1.0955, + "step": 377, + "time": 13.2 + }, + { + "epoch": 0.36, + "learning_rate": "1.4511e-04", + "loss": 1.2371, + "slid_loss": 1.0971, + "step": 378, + "time": 13.73 + }, + { + "epoch": 0.36, + "learning_rate": "1.4549e-04", + "loss": 1.044, + "slid_loss": 1.0967, + "step": 379, + "time": 13.43 + }, + { + "epoch": 0.37, + "learning_rate": "1.4587e-04", + "loss": 1.1135, + "slid_loss": 1.097, + "step": 380, + "time": 12.89 + }, + { + "epoch": 0.37, + "learning_rate": "1.4626e-04", + "loss": 1.0429, + "slid_loss": 1.0972, + "step": 381, + "time": 11.21 + }, + { + "epoch": 0.37, + "learning_rate": "1.4664e-04", + "loss": 1.046, + "slid_loss": 1.0964, + "step": 382, + "time": 11.45 + }, + { + "epoch": 0.37, + "learning_rate": "1.4702e-04", + "loss": 1.1237, + "slid_loss": 1.0974, + "step": 383, + "time": 11.13 + }, + { + "epoch": 0.37, + "learning_rate": "1.4741e-04", + "loss": 1.0212, + "slid_loss": 1.0967, + "step": 384, + "time": 12.17 + }, + { + "epoch": 0.37, + "learning_rate": "1.4779e-04", + "loss": 0.9437, + "slid_loss": 1.095, + "step": 385, + "time": 13.56 + }, + { + "epoch": 0.37, + "learning_rate": "1.4818e-04", + "loss": 1.0723, + "slid_loss": 1.0944, + "step": 386, + "time": 14.07 + }, + { + "epoch": 0.37, + "learning_rate": "1.4856e-04", + "loss": 1.0119, + "slid_loss": 1.0928, + "step": 387, + "time": 13.98 + }, + { + "epoch": 0.37, + "learning_rate": "1.4894e-04", + "loss": 1.1427, + "slid_loss": 1.0932, + "step": 388, + "time": 14.01 + }, + { + "epoch": 0.37, + "learning_rate": "1.4933e-04", + "loss": 1.0418, + "slid_loss": 1.0918, + "step": 389, + "time": 13.75 + }, + { + "epoch": 0.37, + "learning_rate": "1.4971e-04", + "loss": 1.0642, + "slid_loss": 1.0913, + "step": 390, + "time": 13.87 + }, + { + "epoch": 0.38, + "learning_rate": "1.5010e-04", + "loss": 1.0042, + "slid_loss": 1.0905, + "step": 391, + "time": 13.65 + }, + { + "epoch": 0.38, + "learning_rate": "1.5048e-04", + "loss": 0.9899, + "slid_loss": 1.0883, + "step": 392, + "time": 14.41 + }, + { + "epoch": 0.38, + "learning_rate": "1.5086e-04", + "loss": 1.0094, + "slid_loss": 1.0863, + "step": 393, + "time": 13.43 + }, + { + "epoch": 0.38, + "learning_rate": "1.5125e-04", + "loss": 1.178, + "slid_loss": 1.0871, + "step": 394, + "time": 13.18 + }, + { + "epoch": 0.38, + "learning_rate": "1.5163e-04", + "loss": 1.0861, + "slid_loss": 1.0874, + "step": 395, + "time": 13.12 + }, + { + "epoch": 0.38, + "learning_rate": "1.5202e-04", + "loss": 0.9624, + "slid_loss": 1.0867, + "step": 396, + "time": 12.6 + }, + { + "epoch": 0.38, + "learning_rate": "1.5240e-04", + "loss": 1.0635, + "slid_loss": 1.0865, + "step": 397, + "time": 13.9 + }, + { + "epoch": 0.38, + "learning_rate": "1.5278e-04", + "loss": 1.0372, + "slid_loss": 1.0862, + "step": 398, + "time": 10.54 + }, + { + "epoch": 0.38, + "learning_rate": "1.5317e-04", + "loss": 1.0002, + "slid_loss": 1.0851, + "step": 399, + "time": 13.66 + }, + { + "epoch": 0.38, + "learning_rate": "1.5355e-04", + "loss": 1.063, + "slid_loss": 1.0839, + "step": 400, + "time": 12.87 + }, + { + "epoch": 0.39, + "learning_rate": "1.5393e-04", + "loss": 1.1754, + "slid_loss": 1.0853, + "step": 401, + "time": 12.83 + }, + { + "epoch": 0.39, + "learning_rate": "1.5432e-04", + "loss": 1.0598, + "slid_loss": 1.0864, + "step": 402, + "time": 13.15 + }, + { + "epoch": 0.39, + "learning_rate": "1.5470e-04", + "loss": 1.0806, + "slid_loss": 1.0859, + "step": 403, + "time": 13.68 + }, + { + "epoch": 0.39, + "learning_rate": "1.5509e-04", + "loss": 1.1851, + "slid_loss": 1.0867, + "step": 404, + "time": 14.14 + }, + { + "epoch": 0.39, + "learning_rate": "1.5547e-04", + "loss": 1.1202, + "slid_loss": 1.0868, + "step": 405, + "time": 13.46 + }, + { + "epoch": 0.39, + "learning_rate": "1.5585e-04", + "loss": 1.1635, + "slid_loss": 1.0878, + "step": 406, + "time": 12.14 + }, + { + "epoch": 0.39, + "learning_rate": "1.5624e-04", + "loss": 1.1358, + "slid_loss": 1.0877, + "step": 407, + "time": 13.34 + }, + { + "epoch": 0.39, + "learning_rate": "1.5662e-04", + "loss": 1.1069, + "slid_loss": 1.0875, + "step": 408, + "time": 11.44 + }, + { + "epoch": 0.39, + "learning_rate": "1.5701e-04", + "loss": 1.1895, + "slid_loss": 1.0883, + "step": 409, + "time": 14.11 + }, + { + "epoch": 0.39, + "learning_rate": "1.5739e-04", + "loss": 0.9738, + "slid_loss": 1.087, + "step": 410, + "time": 13.65 + }, + { + "epoch": 0.39, + "learning_rate": "1.5777e-04", + "loss": 1.1221, + "slid_loss": 1.0873, + "step": 411, + "time": 13.2 + }, + { + "epoch": 0.4, + "learning_rate": "1.5816e-04", + "loss": 1.1668, + "slid_loss": 1.0867, + "step": 412, + "time": 13.82 + }, + { + "epoch": 0.4, + "learning_rate": "1.5854e-04", + "loss": 1.1516, + "slid_loss": 1.0879, + "step": 413, + "time": 11.91 + }, + { + "epoch": 0.4, + "learning_rate": "1.5893e-04", + "loss": 1.0854, + "slid_loss": 1.0883, + "step": 414, + "time": 13.41 + }, + { + "epoch": 0.4, + "learning_rate": "1.5931e-04", + "loss": 1.0152, + "slid_loss": 1.0871, + "step": 415, + "time": 13.07 + }, + { + "epoch": 0.4, + "learning_rate": "1.5969e-04", + "loss": 1.1072, + "slid_loss": 1.0867, + "step": 416, + "time": 12.04 + }, + { + "epoch": 0.4, + "learning_rate": "1.6008e-04", + "loss": 1.035, + "slid_loss": 1.0866, + "step": 417, + "time": 13.89 + }, + { + "epoch": 0.4, + "learning_rate": "1.6046e-04", + "loss": 1.1327, + "slid_loss": 1.0876, + "step": 418, + "time": 12.61 + }, + { + "epoch": 0.4, + "learning_rate": "1.6084e-04", + "loss": 0.9402, + "slid_loss": 1.0867, + "step": 419, + "time": 10.8 + }, + { + "epoch": 0.4, + "learning_rate": "1.6123e-04", + "loss": 1.1575, + "slid_loss": 1.0869, + "step": 420, + "time": 13.08 + }, + { + "epoch": 0.4, + "learning_rate": "1.6161e-04", + "loss": 1.0064, + "slid_loss": 1.0868, + "step": 421, + "time": 12.68 + }, + { + "epoch": 0.41, + "learning_rate": "1.6200e-04", + "loss": 1.1238, + "slid_loss": 1.0876, + "step": 422, + "time": 13.5 + }, + { + "epoch": 0.41, + "learning_rate": "1.6238e-04", + "loss": 1.0533, + "slid_loss": 1.0862, + "step": 423, + "time": 12.94 + }, + { + "epoch": 0.41, + "learning_rate": "1.6276e-04", + "loss": 1.1043, + "slid_loss": 1.0859, + "step": 424, + "time": 11.9 + }, + { + "epoch": 0.41, + "learning_rate": "1.6315e-04", + "loss": 1.0688, + "slid_loss": 1.0855, + "step": 425, + "time": 12.98 + }, + { + "epoch": 0.41, + "learning_rate": "1.6353e-04", + "loss": 1.0692, + "slid_loss": 1.0852, + "step": 426, + "time": 13.77 + }, + { + "epoch": 0.41, + "learning_rate": "1.6392e-04", + "loss": 1.0817, + "slid_loss": 1.0853, + "step": 427, + "time": 12.28 + }, + { + "epoch": 0.41, + "learning_rate": "1.6430e-04", + "loss": 0.9885, + "slid_loss": 1.0847, + "step": 428, + "time": 13.05 + }, + { + "epoch": 0.41, + "learning_rate": "1.6468e-04", + "loss": 1.0434, + "slid_loss": 1.0829, + "step": 429, + "time": 12.83 + }, + { + "epoch": 0.41, + "learning_rate": "1.6507e-04", + "loss": 1.1381, + "slid_loss": 1.0849, + "step": 430, + "time": 13.82 + }, + { + "epoch": 0.41, + "learning_rate": "1.6545e-04", + "loss": 0.9673, + "slid_loss": 1.0833, + "step": 431, + "time": 13.65 + }, + { + "epoch": 0.41, + "learning_rate": "1.6583e-04", + "loss": 1.1323, + "slid_loss": 1.0827, + "step": 432, + "time": 13.27 + }, + { + "epoch": 0.42, + "learning_rate": "1.6622e-04", + "loss": 1.0484, + "slid_loss": 1.0823, + "step": 433, + "time": 14.47 + }, + { + "epoch": 0.42, + "learning_rate": "1.6660e-04", + "loss": 1.0162, + "slid_loss": 1.0816, + "step": 434, + "time": 10.84 + }, + { + "epoch": 0.42, + "learning_rate": "1.6699e-04", + "loss": 1.0345, + "slid_loss": 1.0807, + "step": 435, + "time": 11.46 + }, + { + "epoch": 0.42, + "learning_rate": "1.6737e-04", + "loss": 1.0991, + "slid_loss": 1.0797, + "step": 436, + "time": 12.81 + }, + { + "epoch": 0.42, + "learning_rate": "1.6775e-04", + "loss": 1.1863, + "slid_loss": 1.0807, + "step": 437, + "time": 13.83 + }, + { + "epoch": 0.42, + "learning_rate": "1.6814e-04", + "loss": 1.184, + "slid_loss": 1.0821, + "step": 438, + "time": 12.91 + }, + { + "epoch": 0.42, + "learning_rate": "1.6852e-04", + "loss": 0.9225, + "slid_loss": 1.08, + "step": 439, + "time": 13.16 + }, + { + "epoch": 0.42, + "learning_rate": "1.6891e-04", + "loss": 1.0758, + "slid_loss": 1.0797, + "step": 440, + "time": 13.3 + }, + { + "epoch": 0.42, + "learning_rate": "1.6929e-04", + "loss": 1.0419, + "slid_loss": 1.0793, + "step": 441, + "time": 13.85 + }, + { + "epoch": 0.42, + "learning_rate": "1.6967e-04", + "loss": 0.9558, + "slid_loss": 1.0784, + "step": 442, + "time": 12.44 + }, + { + "epoch": 0.43, + "learning_rate": "1.7006e-04", + "loss": 1.0148, + "slid_loss": 1.0775, + "step": 443, + "time": 12.38 + }, + { + "epoch": 0.43, + "learning_rate": "1.7044e-04", + "loss": 1.1866, + "slid_loss": 1.0782, + "step": 444, + "time": 11.37 + }, + { + "epoch": 0.43, + "learning_rate": "1.7083e-04", + "loss": 1.0033, + "slid_loss": 1.0773, + "step": 445, + "time": 13.22 + }, + { + "epoch": 0.43, + "learning_rate": "1.7121e-04", + "loss": 1.1825, + "slid_loss": 1.0793, + "step": 446, + "time": 11.81 + }, + { + "epoch": 0.43, + "learning_rate": "1.7159e-04", + "loss": 1.0948, + "slid_loss": 1.0794, + "step": 447, + "time": 11.52 + }, + { + "epoch": 0.43, + "learning_rate": "1.7198e-04", + "loss": 1.1319, + "slid_loss": 1.0801, + "step": 448, + "time": 13.96 + }, + { + "epoch": 0.43, + "learning_rate": "1.7236e-04", + "loss": 1.0548, + "slid_loss": 1.0804, + "step": 449, + "time": 13.66 + }, + { + "epoch": 0.43, + "learning_rate": "1.7274e-04", + "loss": 1.0546, + "slid_loss": 1.0805, + "step": 450, + "time": 11.64 + }, + { + "epoch": 0.43, + "learning_rate": "1.7313e-04", + "loss": 1.1605, + "slid_loss": 1.0801, + "step": 451, + "time": 13.41 + }, + { + "epoch": 0.43, + "learning_rate": "1.7351e-04", + "loss": 1.1163, + "slid_loss": 1.0797, + "step": 452, + "time": 12.7 + }, + { + "epoch": 0.44, + "learning_rate": "1.7390e-04", + "loss": 1.0139, + "slid_loss": 1.0788, + "step": 453, + "time": 12.74 + }, + { + "epoch": 0.44, + "learning_rate": "1.7428e-04", + "loss": 1.0758, + "slid_loss": 1.0797, + "step": 454, + "time": 13.4 + }, + { + "epoch": 0.44, + "learning_rate": "1.7466e-04", + "loss": 1.1292, + "slid_loss": 1.0809, + "step": 455, + "time": 13.01 + }, + { + "epoch": 0.44, + "learning_rate": "1.7505e-04", + "loss": 0.9871, + "slid_loss": 1.079, + "step": 456, + "time": 12.94 + }, + { + "epoch": 0.44, + "learning_rate": "1.7543e-04", + "loss": 1.0647, + "slid_loss": 1.079, + "step": 457, + "time": 13.35 + }, + { + "epoch": 0.44, + "learning_rate": "1.7582e-04", + "loss": 1.1903, + "slid_loss": 1.0801, + "step": 458, + "time": 12.78 + }, + { + "epoch": 0.44, + "learning_rate": "1.7620e-04", + "loss": 0.982, + "slid_loss": 1.0787, + "step": 459, + "time": 12.75 + }, + { + "epoch": 0.44, + "learning_rate": "1.7658e-04", + "loss": 1.1235, + "slid_loss": 1.08, + "step": 460, + "time": 13.4 + }, + { + "epoch": 0.44, + "learning_rate": "1.7697e-04", + "loss": 1.0177, + "slid_loss": 1.0795, + "step": 461, + "time": 13.67 + }, + { + "epoch": 0.44, + "learning_rate": "1.7735e-04", + "loss": 1.0484, + "slid_loss": 1.0776, + "step": 462, + "time": 13.0 + }, + { + "epoch": 0.44, + "learning_rate": "1.7774e-04", + "loss": 1.0731, + "slid_loss": 1.0769, + "step": 463, + "time": 14.16 + }, + { + "epoch": 0.45, + "learning_rate": "1.7812e-04", + "loss": 1.1799, + "slid_loss": 1.0777, + "step": 464, + "time": 13.77 + }, + { + "epoch": 0.45, + "learning_rate": "1.7850e-04", + "loss": 0.997, + "slid_loss": 1.0776, + "step": 465, + "time": 12.27 + }, + { + "epoch": 0.45, + "learning_rate": "1.7889e-04", + "loss": 1.2199, + "slid_loss": 1.0788, + "step": 466, + "time": 13.43 + }, + { + "epoch": 0.45, + "learning_rate": "1.7927e-04", + "loss": 1.0571, + "slid_loss": 1.0783, + "step": 467, + "time": 13.76 + }, + { + "epoch": 0.45, + "learning_rate": "1.7965e-04", + "loss": 1.006, + "slid_loss": 1.0777, + "step": 468, + "time": 14.02 + }, + { + "epoch": 0.45, + "learning_rate": "1.8004e-04", + "loss": 1.1346, + "slid_loss": 1.0787, + "step": 469, + "time": 11.23 + }, + { + "epoch": 0.45, + "learning_rate": "1.8042e-04", + "loss": 1.0168, + "slid_loss": 1.0768, + "step": 470, + "time": 11.57 + }, + { + "epoch": 0.45, + "learning_rate": "1.8081e-04", + "loss": 1.061, + "slid_loss": 1.0758, + "step": 471, + "time": 13.29 + }, + { + "epoch": 0.45, + "learning_rate": "1.8119e-04", + "loss": 1.1233, + "slid_loss": 1.0764, + "step": 472, + "time": 13.76 + }, + { + "epoch": 0.45, + "learning_rate": "1.8157e-04", + "loss": 1.1175, + "slid_loss": 1.0766, + "step": 473, + "time": 12.83 + }, + { + "epoch": 0.46, + "learning_rate": "1.8196e-04", + "loss": 1.074, + "slid_loss": 1.0775, + "step": 474, + "time": 11.96 + }, + { + "epoch": 0.46, + "learning_rate": "1.8234e-04", + "loss": 0.9853, + "slid_loss": 1.076, + "step": 475, + "time": 14.03 + }, + { + "epoch": 0.46, + "learning_rate": "1.8273e-04", + "loss": 1.0981, + "slid_loss": 1.0761, + "step": 476, + "time": 14.62 + }, + { + "epoch": 0.46, + "learning_rate": "1.8311e-04", + "loss": 1.0868, + "slid_loss": 1.0761, + "step": 477, + "time": 13.67 + }, + { + "epoch": 0.46, + "learning_rate": "1.8349e-04", + "loss": 1.0221, + "slid_loss": 1.0739, + "step": 478, + "time": 13.36 + }, + { + "epoch": 0.46, + "learning_rate": "1.8388e-04", + "loss": 1.1662, + "slid_loss": 1.0751, + "step": 479, + "time": 11.83 + }, + { + "epoch": 0.46, + "learning_rate": "1.8426e-04", + "loss": 1.0696, + "slid_loss": 1.0747, + "step": 480, + "time": 14.11 + }, + { + "epoch": 0.46, + "learning_rate": "1.8464e-04", + "loss": 0.9884, + "slid_loss": 1.0742, + "step": 481, + "time": 12.88 + }, + { + "epoch": 0.46, + "learning_rate": "1.8503e-04", + "loss": 1.0783, + "slid_loss": 1.0745, + "step": 482, + "time": 11.91 + }, + { + "epoch": 0.46, + "learning_rate": "1.8541e-04", + "loss": 1.0157, + "slid_loss": 1.0734, + "step": 483, + "time": 13.19 + }, + { + "epoch": 0.46, + "learning_rate": "1.8580e-04", + "loss": 1.1623, + "slid_loss": 1.0748, + "step": 484, + "time": 14.2 + }, + { + "epoch": 0.47, + "learning_rate": "1.8618e-04", + "loss": 1.0329, + "slid_loss": 1.0757, + "step": 485, + "time": 12.78 + }, + { + "epoch": 0.47, + "learning_rate": "1.8656e-04", + "loss": 1.0709, + "slid_loss": 1.0757, + "step": 486, + "time": 12.53 + }, + { + "epoch": 0.47, + "learning_rate": "1.8695e-04", + "loss": 1.0532, + "slid_loss": 1.0761, + "step": 487, + "time": 13.2 + }, + { + "epoch": 0.47, + "learning_rate": "1.8733e-04", + "loss": 1.0888, + "slid_loss": 1.0756, + "step": 488, + "time": 13.27 + }, + { + "epoch": 0.47, + "learning_rate": "1.8772e-04", + "loss": 0.9069, + "slid_loss": 1.0742, + "step": 489, + "time": 14.26 + }, + { + "epoch": 0.47, + "learning_rate": "1.8810e-04", + "loss": 0.9678, + "slid_loss": 1.0733, + "step": 490, + "time": 13.49 + }, + { + "epoch": 0.47, + "learning_rate": "1.8848e-04", + "loss": 0.9874, + "slid_loss": 1.0731, + "step": 491, + "time": 11.78 + }, + { + "epoch": 0.47, + "learning_rate": "1.8887e-04", + "loss": 1.1192, + "slid_loss": 1.0744, + "step": 492, + "time": 13.67 + }, + { + "epoch": 0.47, + "learning_rate": "1.8925e-04", + "loss": 1.0061, + "slid_loss": 1.0743, + "step": 493, + "time": 12.87 + }, + { + "epoch": 0.47, + "learning_rate": "1.8964e-04", + "loss": 0.9751, + "slid_loss": 1.0723, + "step": 494, + "time": 12.87 + }, + { + "epoch": 0.48, + "learning_rate": "1.9002e-04", + "loss": 1.1274, + "slid_loss": 1.0727, + "step": 495, + "time": 13.94 + }, + { + "epoch": 0.48, + "learning_rate": "1.9040e-04", + "loss": 1.0814, + "slid_loss": 1.0739, + "step": 496, + "time": 13.4 + }, + { + "epoch": 0.48, + "learning_rate": "1.9079e-04", + "loss": 1.0962, + "slid_loss": 1.0743, + "step": 497, + "time": 13.36 + }, + { + "epoch": 0.48, + "learning_rate": "1.9117e-04", + "loss": 1.117, + "slid_loss": 1.075, + "step": 498, + "time": 13.34 + }, + { + "epoch": 0.48, + "learning_rate": "1.9155e-04", + "loss": 1.0042, + "slid_loss": 1.0751, + "step": 499, + "time": 12.48 + }, + { + "epoch": 0.48, + "learning_rate": "1.9194e-04", + "loss": 0.9939, + "slid_loss": 1.0744, + "step": 500, + "time": 12.04 + }, + { + "epoch": 0.48, + "learning_rate": "1.9232e-04", + "loss": 0.9867, + "slid_loss": 1.0725, + "step": 501, + "time": 12.97 + }, + { + "epoch": 0.48, + "learning_rate": "1.9271e-04", + "loss": 1.053, + "slid_loss": 1.0724, + "step": 502, + "time": 13.34 + }, + { + "epoch": 0.48, + "learning_rate": "1.9309e-04", + "loss": 1.1517, + "slid_loss": 1.0732, + "step": 503, + "time": 11.25 + }, + { + "epoch": 0.48, + "learning_rate": "1.9347e-04", + "loss": 1.1175, + "slid_loss": 1.0725, + "step": 504, + "time": 13.36 + }, + { + "epoch": 0.49, + "learning_rate": "1.9386e-04", + "loss": 0.9554, + "slid_loss": 1.0708, + "step": 505, + "time": 10.67 + }, + { + "epoch": 0.49, + "learning_rate": "1.9424e-04", + "loss": 1.0071, + "slid_loss": 1.0693, + "step": 506, + "time": 11.57 + }, + { + "epoch": 0.49, + "learning_rate": "1.9463e-04", + "loss": 1.0807, + "slid_loss": 1.0687, + "step": 507, + "time": 12.99 + }, + { + "epoch": 0.49, + "learning_rate": "1.9501e-04", + "loss": 1.1357, + "slid_loss": 1.069, + "step": 508, + "time": 13.84 + }, + { + "epoch": 0.49, + "learning_rate": "1.9539e-04", + "loss": 1.1089, + "slid_loss": 1.0682, + "step": 509, + "time": 13.2 + }, + { + "epoch": 0.49, + "learning_rate": "1.9578e-04", + "loss": 1.123, + "slid_loss": 1.0697, + "step": 510, + "time": 14.04 + }, + { + "epoch": 0.49, + "learning_rate": "1.9616e-04", + "loss": 1.018, + "slid_loss": 1.0686, + "step": 511, + "time": 14.48 + }, + { + "epoch": 0.49, + "learning_rate": "1.9655e-04", + "loss": 1.0768, + "slid_loss": 1.0677, + "step": 512, + "time": 12.95 + }, + { + "epoch": 0.49, + "learning_rate": "1.9693e-04", + "loss": 1.1262, + "slid_loss": 1.0675, + "step": 513, + "time": 13.41 + }, + { + "epoch": 0.49, + "learning_rate": "1.9731e-04", + "loss": 1.1045, + "slid_loss": 1.0677, + "step": 514, + "time": 12.71 + }, + { + "epoch": 0.49, + "learning_rate": "1.9770e-04", + "loss": 1.1387, + "slid_loss": 1.0689, + "step": 515, + "time": 12.43 + }, + { + "epoch": 0.5, + "learning_rate": "1.9808e-04", + "loss": 1.0626, + "slid_loss": 1.0685, + "step": 516, + "time": 14.1 + }, + { + "epoch": 0.5, + "learning_rate": "1.9846e-04", + "loss": 1.0563, + "slid_loss": 1.0687, + "step": 517, + "time": 13.01 + }, + { + "epoch": 0.5, + "learning_rate": "1.9885e-04", + "loss": 1.0713, + "slid_loss": 1.0681, + "step": 518, + "time": 12.31 + }, + { + "epoch": 0.5, + "learning_rate": "1.9923e-04", + "loss": 1.1455, + "slid_loss": 1.0701, + "step": 519, + "time": 12.91 + }, + { + "epoch": 0.5, + "learning_rate": "1.9962e-04", + "loss": 1.0585, + "slid_loss": 1.0691, + "step": 520, + "time": 13.81 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 0.9356, + "slid_loss": 1.0684, + "step": 521, + "time": 13.82 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 1.0925, + "slid_loss": 1.0681, + "step": 522, + "time": 13.31 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 1.1168, + "slid_loss": 1.0688, + "step": 523, + "time": 13.47 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 1.1304, + "slid_loss": 1.069, + "step": 524, + "time": 13.76 + }, + { + "epoch": 0.5, + "learning_rate": "2.0000e-04", + "loss": 1.02, + "slid_loss": 1.0685, + "step": 525, + "time": 11.38 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.1233, + "slid_loss": 1.0691, + "step": 526, + "time": 13.95 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.179, + "slid_loss": 1.07, + "step": 527, + "time": 13.7 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0579, + "slid_loss": 1.0707, + "step": 528, + "time": 12.8 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0605, + "slid_loss": 1.0709, + "step": 529, + "time": 13.56 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.1178, + "slid_loss": 1.0707, + "step": 530, + "time": 13.9 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0711, + "slid_loss": 1.0717, + "step": 531, + "time": 12.17 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.9443, + "slid_loss": 1.0699, + "step": 532, + "time": 11.7 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 0.9551, + "slid_loss": 1.0689, + "step": 533, + "time": 13.55 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0341, + "slid_loss": 1.0691, + "step": 534, + "time": 11.13 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0816, + "slid_loss": 1.0696, + "step": 535, + "time": 12.56 + }, + { + "epoch": 0.51, + "learning_rate": "2.0000e-04", + "loss": 1.0912, + "slid_loss": 1.0695, + "step": 536, + "time": 13.71 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 1.0296, + "slid_loss": 1.0679, + "step": 537, + "time": 14.05 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 1.0475, + "slid_loss": 1.0666, + "step": 538, + "time": 10.59 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 0.9315, + "slid_loss": 1.0667, + "step": 539, + "time": 13.78 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 1.018, + "slid_loss": 1.0661, + "step": 540, + "time": 14.04 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 1.0196, + "slid_loss": 1.0659, + "step": 541, + "time": 12.37 + }, + { + "epoch": 0.52, + "learning_rate": "2.0000e-04", + "loss": 1.0688, + "slid_loss": 1.067, + "step": 542, + "time": 14.49 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 1.1505, + "slid_loss": 1.0683, + "step": 543, + "time": 14.0 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 1.1506, + "slid_loss": 1.068, + "step": 544, + "time": 13.73 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 1.199, + "slid_loss": 1.0699, + "step": 545, + "time": 12.84 + }, + { + "epoch": 0.52, + "learning_rate": "1.9999e-04", + "loss": 1.0458, + "slid_loss": 1.0686, + "step": 546, + "time": 12.29 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.9727, + "slid_loss": 1.0674, + "step": 547, + "time": 14.12 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.1303, + "slid_loss": 1.0673, + "step": 548, + "time": 12.39 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0758, + "slid_loss": 1.0675, + "step": 549, + "time": 11.91 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0393, + "slid_loss": 1.0674, + "step": 550, + "time": 12.28 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.1124, + "slid_loss": 1.0669, + "step": 551, + "time": 13.45 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 0.9769, + "slid_loss": 1.0655, + "step": 552, + "time": 13.26 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0826, + "slid_loss": 1.0662, + "step": 553, + "time": 13.78 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0468, + "slid_loss": 1.0659, + "step": 554, + "time": 14.07 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.1096, + "slid_loss": 1.0657, + "step": 555, + "time": 11.69 + }, + { + "epoch": 0.53, + "learning_rate": "1.9999e-04", + "loss": 1.0225, + "slid_loss": 1.0661, + "step": 556, + "time": 13.78 + }, + { + "epoch": 0.54, + "learning_rate": "1.9999e-04", + "loss": 1.1107, + "slid_loss": 1.0665, + "step": 557, + "time": 13.5 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0891, + "slid_loss": 1.0655, + "step": 558, + "time": 12.46 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0006, + "slid_loss": 1.0657, + "step": 559, + "time": 13.51 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0981, + "slid_loss": 1.0655, + "step": 560, + "time": 11.66 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0809, + "slid_loss": 1.0661, + "step": 561, + "time": 11.55 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0517, + "slid_loss": 1.0661, + "step": 562, + "time": 13.77 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.9225, + "slid_loss": 1.0646, + "step": 563, + "time": 13.75 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.0561, + "slid_loss": 1.0634, + "step": 564, + "time": 12.71 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 1.103, + "slid_loss": 1.0644, + "step": 565, + "time": 13.82 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.9465, + "slid_loss": 1.0617, + "step": 566, + "time": 13.9 + }, + { + "epoch": 0.54, + "learning_rate": "1.9998e-04", + "loss": 0.9362, + "slid_loss": 1.0605, + "step": 567, + "time": 12.74 + }, + { + "epoch": 0.55, + "learning_rate": "1.9998e-04", + "loss": 1.0916, + "slid_loss": 1.0613, + "step": 568, + "time": 13.12 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.073, + "slid_loss": 1.0607, + "step": 569, + "time": 13.44 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.9971, + "slid_loss": 1.0605, + "step": 570, + "time": 13.47 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.0541, + "slid_loss": 1.0605, + "step": 571, + "time": 13.37 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.0016, + "slid_loss": 1.0592, + "step": 572, + "time": 13.34 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 1.0541, + "slid_loss": 1.0586, + "step": 573, + "time": 12.87 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.962, + "slid_loss": 1.0575, + "step": 574, + "time": 13.98 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.9362, + "slid_loss": 1.057, + "step": 575, + "time": 13.67 + }, + { + "epoch": 0.55, + "learning_rate": "1.9997e-04", + "loss": 0.9888, + "slid_loss": 1.0559, + "step": 576, + "time": 13.43 + }, + { + "epoch": 0.55, + "learning_rate": "1.9996e-04", + "loss": 1.0207, + "slid_loss": 1.0553, + "step": 577, + "time": 12.7 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.1892, + "slid_loss": 1.0569, + "step": 578, + "time": 13.65 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.0915, + "slid_loss": 1.0562, + "step": 579, + "time": 13.33 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 0.9794, + "slid_loss": 1.0553, + "step": 580, + "time": 13.23 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.0321, + "slid_loss": 1.0557, + "step": 581, + "time": 13.89 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.0096, + "slid_loss": 1.055, + "step": 582, + "time": 12.89 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.1575, + "slid_loss": 1.0564, + "step": 583, + "time": 13.83 + }, + { + "epoch": 0.56, + "learning_rate": "1.9996e-04", + "loss": 1.1859, + "slid_loss": 1.0567, + "step": 584, + "time": 13.31 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 0.9553, + "slid_loss": 1.0559, + "step": 585, + "time": 14.0 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 0.9196, + "slid_loss": 1.0544, + "step": 586, + "time": 12.04 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 1.0118, + "slid_loss": 1.054, + "step": 587, + "time": 12.94 + }, + { + "epoch": 0.56, + "learning_rate": "1.9995e-04", + "loss": 1.0194, + "slid_loss": 1.0533, + "step": 588, + "time": 13.22 + }, + { + "epoch": 0.57, + "learning_rate": "1.9995e-04", + "loss": 1.0106, + "slid_loss": 1.0543, + "step": 589, + "time": 12.11 + }, + { + "epoch": 0.57, + "learning_rate": "1.9995e-04", + "loss": 0.9009, + "slid_loss": 1.0536, + "step": 590, + "time": 13.16 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9611, + "slid_loss": 1.0534, + "step": 591, + "time": 12.29 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9321, + "slid_loss": 1.0515, + "step": 592, + "time": 13.22 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9615, + "slid_loss": 1.0511, + "step": 593, + "time": 14.84 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9307, + "slid_loss": 1.0506, + "step": 594, + "time": 11.31 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 1.1413, + "slid_loss": 1.0508, + "step": 595, + "time": 13.46 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 1.0796, + "slid_loss": 1.0507, + "step": 596, + "time": 12.75 + }, + { + "epoch": 0.57, + "learning_rate": "1.9994e-04", + "loss": 0.9417, + "slid_loss": 1.0492, + "step": 597, + "time": 13.29 + }, + { + "epoch": 0.57, + "learning_rate": "1.9993e-04", + "loss": 1.0056, + "slid_loss": 1.0481, + "step": 598, + "time": 13.73 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.0508, + "slid_loss": 1.0486, + "step": 599, + "time": 13.25 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.0532, + "slid_loss": 1.0491, + "step": 600, + "time": 14.14 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.1259, + "slid_loss": 1.0505, + "step": 601, + "time": 13.97 + }, + { + "epoch": 0.58, + "learning_rate": "1.9993e-04", + "loss": 1.0563, + "slid_loss": 1.0506, + "step": 602, + "time": 13.79 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 1.0944, + "slid_loss": 1.05, + "step": 603, + "time": 12.36 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 1.0569, + "slid_loss": 1.0494, + "step": 604, + "time": 12.2 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 1.0496, + "slid_loss": 1.0503, + "step": 605, + "time": 13.44 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 1.0248, + "slid_loss": 1.0505, + "step": 606, + "time": 12.19 + }, + { + "epoch": 0.58, + "learning_rate": "1.9992e-04", + "loss": 1.0156, + "slid_loss": 1.0499, + "step": 607, + "time": 12.83 + }, + { + "epoch": 0.58, + "learning_rate": "1.9991e-04", + "loss": 1.1058, + "slid_loss": 1.0496, + "step": 608, + "time": 14.04 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 1.1543, + "slid_loss": 1.05, + "step": 609, + "time": 13.7 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 1.0371, + "slid_loss": 1.0492, + "step": 610, + "time": 14.38 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 1.11, + "slid_loss": 1.0501, + "step": 611, + "time": 13.64 + }, + { + "epoch": 0.59, + "learning_rate": "1.9991e-04", + "loss": 1.0472, + "slid_loss": 1.0498, + "step": 612, + "time": 13.86 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 1.0337, + "slid_loss": 1.0489, + "step": 613, + "time": 12.24 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.9967, + "slid_loss": 1.0478, + "step": 614, + "time": 11.89 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 1.036, + "slid_loss": 1.0468, + "step": 615, + "time": 13.44 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 0.988, + "slid_loss": 1.046, + "step": 616, + "time": 11.66 + }, + { + "epoch": 0.59, + "learning_rate": "1.9990e-04", + "loss": 1.01, + "slid_loss": 1.0455, + "step": 617, + "time": 13.3 + }, + { + "epoch": 0.59, + "learning_rate": "1.9989e-04", + "loss": 1.0468, + "slid_loss": 1.0453, + "step": 618, + "time": 14.29 + }, + { + "epoch": 0.59, + "learning_rate": "1.9989e-04", + "loss": 1.0455, + "slid_loss": 1.0443, + "step": 619, + "time": 13.07 + }, + { + "epoch": 0.6, + "learning_rate": "1.9989e-04", + "loss": 1.0149, + "slid_loss": 1.0439, + "step": 620, + "time": 12.87 + }, + { + "epoch": 0.6, + "learning_rate": "1.9989e-04", + "loss": 1.116, + "slid_loss": 1.0457, + "step": 621, + "time": 12.83 + }, + { + "epoch": 0.6, + "learning_rate": "1.9989e-04", + "loss": 1.0674, + "slid_loss": 1.0454, + "step": 622, + "time": 13.72 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 0.9946, + "slid_loss": 1.0442, + "step": 623, + "time": 14.18 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 0.946, + "slid_loss": 1.0423, + "step": 624, + "time": 12.69 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 1.0845, + "slid_loss": 1.043, + "step": 625, + "time": 13.4 + }, + { + "epoch": 0.6, + "learning_rate": "1.9988e-04", + "loss": 1.0035, + "slid_loss": 1.0418, + "step": 626, + "time": 12.73 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 1.0176, + "slid_loss": 1.0402, + "step": 627, + "time": 12.94 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 1.0003, + "slid_loss": 1.0396, + "step": 628, + "time": 11.22 + }, + { + "epoch": 0.6, + "learning_rate": "1.9987e-04", + "loss": 1.0599, + "slid_loss": 1.0396, + "step": 629, + "time": 12.88 + }, + { + "epoch": 0.61, + "learning_rate": "1.9987e-04", + "loss": 1.1429, + "slid_loss": 1.0398, + "step": 630, + "time": 13.28 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 0.9407, + "slid_loss": 1.0385, + "step": 631, + "time": 11.73 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 1.0993, + "slid_loss": 1.0401, + "step": 632, + "time": 13.33 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 1.0566, + "slid_loss": 1.0411, + "step": 633, + "time": 13.19 + }, + { + "epoch": 0.61, + "learning_rate": "1.9986e-04", + "loss": 0.9537, + "slid_loss": 1.0403, + "step": 634, + "time": 12.92 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 1.1051, + "slid_loss": 1.0405, + "step": 635, + "time": 12.84 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 1.0919, + "slid_loss": 1.0405, + "step": 636, + "time": 13.7 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 0.9882, + "slid_loss": 1.0401, + "step": 637, + "time": 11.73 + }, + { + "epoch": 0.61, + "learning_rate": "1.9985e-04", + "loss": 1.1086, + "slid_loss": 1.0407, + "step": 638, + "time": 12.59 + }, + { + "epoch": 0.61, + "learning_rate": "1.9984e-04", + "loss": 1.0329, + "slid_loss": 1.0418, + "step": 639, + "time": 12.82 + }, + { + "epoch": 0.61, + "learning_rate": "1.9984e-04", + "loss": 1.0164, + "slid_loss": 1.0417, + "step": 640, + "time": 13.29 + }, + { + "epoch": 0.62, + "learning_rate": "1.9984e-04", + "loss": 1.0905, + "slid_loss": 1.0425, + "step": 641, + "time": 14.2 + }, + { + "epoch": 0.62, + "learning_rate": "1.9984e-04", + "loss": 1.127, + "slid_loss": 1.043, + "step": 642, + "time": 13.91 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 1.1271, + "slid_loss": 1.0428, + "step": 643, + "time": 13.59 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 1.078, + "slid_loss": 1.0421, + "step": 644, + "time": 13.05 + }, + { + "epoch": 0.62, + "learning_rate": "1.9983e-04", + "loss": 1.0316, + "slid_loss": 1.0404, + "step": 645, + "time": 13.47 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 0.9629, + "slid_loss": 1.0396, + "step": 646, + "time": 12.76 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 0.9961, + "slid_loss": 1.0398, + "step": 647, + "time": 13.45 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 1.0788, + "slid_loss": 1.0393, + "step": 648, + "time": 11.75 + }, + { + "epoch": 0.62, + "learning_rate": "1.9982e-04", + "loss": 1.0963, + "slid_loss": 1.0395, + "step": 649, + "time": 11.44 + }, + { + "epoch": 0.62, + "learning_rate": "1.9981e-04", + "loss": 1.0878, + "slid_loss": 1.04, + "step": 650, + "time": 13.24 + }, + { + "epoch": 0.63, + "learning_rate": "1.9981e-04", + "loss": 1.0527, + "slid_loss": 1.0394, + "step": 651, + "time": 12.81 + }, + { + "epoch": 0.63, + "learning_rate": "1.9981e-04", + "loss": 0.9772, + "slid_loss": 1.0394, + "step": 652, + "time": 13.61 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 1.085, + "slid_loss": 1.0394, + "step": 653, + "time": 13.82 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 1.0466, + "slid_loss": 1.0394, + "step": 654, + "time": 13.59 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 1.0012, + "slid_loss": 1.0383, + "step": 655, + "time": 11.44 + }, + { + "epoch": 0.63, + "learning_rate": "1.9980e-04", + "loss": 1.0059, + "slid_loss": 1.0382, + "step": 656, + "time": 13.54 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 1.0011, + "slid_loss": 1.0371, + "step": 657, + "time": 13.73 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 1.0159, + "slid_loss": 1.0363, + "step": 658, + "time": 12.09 + }, + { + "epoch": 0.63, + "learning_rate": "1.9979e-04", + "loss": 1.1605, + "slid_loss": 1.0379, + "step": 659, + "time": 13.28 + }, + { + "epoch": 0.63, + "learning_rate": "1.9978e-04", + "loss": 1.0338, + "slid_loss": 1.0373, + "step": 660, + "time": 12.07 + }, + { + "epoch": 0.63, + "learning_rate": "1.9978e-04", + "loss": 1.0183, + "slid_loss": 1.0367, + "step": 661, + "time": 13.55 + }, + { + "epoch": 0.64, + "learning_rate": "1.9978e-04", + "loss": 0.998, + "slid_loss": 1.0361, + "step": 662, + "time": 13.95 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 1.0519, + "slid_loss": 1.0374, + "step": 663, + "time": 12.42 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 1.134, + "slid_loss": 1.0382, + "step": 664, + "time": 13.4 + }, + { + "epoch": 0.64, + "learning_rate": "1.9977e-04", + "loss": 1.012, + "slid_loss": 1.0373, + "step": 665, + "time": 13.24 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 0.9695, + "slid_loss": 1.0375, + "step": 666, + "time": 12.78 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 0.9398, + "slid_loss": 1.0376, + "step": 667, + "time": 13.42 + }, + { + "epoch": 0.64, + "learning_rate": "1.9976e-04", + "loss": 0.9892, + "slid_loss": 1.0365, + "step": 668, + "time": 12.93 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 1.0776, + "slid_loss": 1.0366, + "step": 669, + "time": 13.41 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 0.9244, + "slid_loss": 1.0358, + "step": 670, + "time": 14.82 + }, + { + "epoch": 0.64, + "learning_rate": "1.9975e-04", + "loss": 1.0958, + "slid_loss": 1.0363, + "step": 671, + "time": 13.62 + }, + { + "epoch": 0.65, + "learning_rate": "1.9974e-04", + "loss": 1.11, + "slid_loss": 1.0373, + "step": 672, + "time": 10.89 + }, + { + "epoch": 0.65, + "learning_rate": "1.9974e-04", + "loss": 1.0657, + "slid_loss": 1.0375, + "step": 673, + "time": 13.87 + }, + { + "epoch": 0.65, + "learning_rate": "1.9974e-04", + "loss": 0.9387, + "slid_loss": 1.0372, + "step": 674, + "time": 13.0 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 1.012, + "slid_loss": 1.038, + "step": 675, + "time": 14.32 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 0.9642, + "slid_loss": 1.0377, + "step": 676, + "time": 12.75 + }, + { + "epoch": 0.65, + "learning_rate": "1.9973e-04", + "loss": 0.992, + "slid_loss": 1.0375, + "step": 677, + "time": 12.78 + }, + { + "epoch": 0.65, + "learning_rate": "1.9972e-04", + "loss": 1.1429, + "slid_loss": 1.037, + "step": 678, + "time": 12.87 + }, + { + "epoch": 0.65, + "learning_rate": "1.9972e-04", + "loss": 1.0107, + "slid_loss": 1.0362, + "step": 679, + "time": 13.95 + }, + { + "epoch": 0.65, + "learning_rate": "1.9972e-04", + "loss": 1.0233, + "slid_loss": 1.0366, + "step": 680, + "time": 13.34 + }, + { + "epoch": 0.65, + "learning_rate": "1.9971e-04", + "loss": 1.025, + "slid_loss": 1.0366, + "step": 681, + "time": 14.06 + }, + { + "epoch": 0.66, + "learning_rate": "1.9971e-04", + "loss": 0.8827, + "slid_loss": 1.0353, + "step": 682, + "time": 13.64 + }, + { + "epoch": 0.66, + "learning_rate": "1.9971e-04", + "loss": 1.0185, + "slid_loss": 1.0339, + "step": 683, + "time": 12.54 + }, + { + "epoch": 0.66, + "learning_rate": "1.9970e-04", + "loss": 1.0916, + "slid_loss": 1.0329, + "step": 684, + "time": 11.81 + }, + { + "epoch": 0.66, + "learning_rate": "1.9970e-04", + "loss": 1.1512, + "slid_loss": 1.0349, + "step": 685, + "time": 13.22 + }, + { + "epoch": 0.66, + "learning_rate": "1.9969e-04", + "loss": 1.033, + "slid_loss": 1.036, + "step": 686, + "time": 12.2 + }, + { + "epoch": 0.66, + "learning_rate": "1.9969e-04", + "loss": 1.0512, + "slid_loss": 1.0364, + "step": 687, + "time": 13.61 + }, + { + "epoch": 0.66, + "learning_rate": "1.9969e-04", + "loss": 1.001, + "slid_loss": 1.0363, + "step": 688, + "time": 12.99 + }, + { + "epoch": 0.66, + "learning_rate": "1.9968e-04", + "loss": 1.0962, + "slid_loss": 1.0371, + "step": 689, + "time": 13.4 + }, + { + "epoch": 0.66, + "learning_rate": "1.9968e-04", + "loss": 1.0694, + "slid_loss": 1.0388, + "step": 690, + "time": 13.43 + }, + { + "epoch": 0.66, + "learning_rate": "1.9968e-04", + "loss": 1.0784, + "slid_loss": 1.04, + "step": 691, + "time": 12.79 + }, + { + "epoch": 0.66, + "learning_rate": "1.9967e-04", + "loss": 1.014, + "slid_loss": 1.0408, + "step": 692, + "time": 13.63 + }, + { + "epoch": 0.67, + "learning_rate": "1.9967e-04", + "loss": 1.0197, + "slid_loss": 1.0414, + "step": 693, + "time": 13.47 + }, + { + "epoch": 0.67, + "learning_rate": "1.9966e-04", + "loss": 0.9544, + "slid_loss": 1.0416, + "step": 694, + "time": 13.76 + }, + { + "epoch": 0.67, + "learning_rate": "1.9966e-04", + "loss": 1.0017, + "slid_loss": 1.0402, + "step": 695, + "time": 11.28 + }, + { + "epoch": 0.67, + "learning_rate": "1.9966e-04", + "loss": 1.0345, + "slid_loss": 1.0398, + "step": 696, + "time": 12.56 + }, + { + "epoch": 0.67, + "learning_rate": "1.9965e-04", + "loss": 1.0529, + "slid_loss": 1.0409, + "step": 697, + "time": 12.28 + }, + { + "epoch": 0.67, + "learning_rate": "1.9965e-04", + "loss": 0.9044, + "slid_loss": 1.0399, + "step": 698, + "time": 12.36 + }, + { + "epoch": 0.67, + "learning_rate": "1.9964e-04", + "loss": 1.0369, + "slid_loss": 1.0397, + "step": 699, + "time": 12.27 + }, + { + "epoch": 0.67, + "learning_rate": "1.9964e-04", + "loss": 1.0728, + "slid_loss": 1.0399, + "step": 700, + "time": 13.69 + }, + { + "epoch": 0.67, + "learning_rate": "1.9964e-04", + "loss": 0.9856, + "slid_loss": 1.0385, + "step": 701, + "time": 13.58 + }, + { + "epoch": 0.67, + "learning_rate": "1.9963e-04", + "loss": 0.9269, + "slid_loss": 1.0372, + "step": 702, + "time": 11.8 + }, + { + "epoch": 0.68, + "learning_rate": "1.9963e-04", + "loss": 0.9148, + "slid_loss": 1.0354, + "step": 703, + "time": 12.84 + }, + { + "epoch": 0.68, + "learning_rate": "1.9962e-04", + "loss": 1.0185, + "slid_loss": 1.035, + "step": 704, + "time": 11.28 + }, + { + "epoch": 0.68, + "learning_rate": "1.9962e-04", + "loss": 1.0797, + "slid_loss": 1.0353, + "step": 705, + "time": 11.63 + }, + { + "epoch": 0.68, + "learning_rate": "1.9962e-04", + "loss": 1.0627, + "slid_loss": 1.0357, + "step": 706, + "time": 13.45 + }, + { + "epoch": 0.68, + "learning_rate": "1.9961e-04", + "loss": 1.0362, + "slid_loss": 1.0359, + "step": 707, + "time": 12.82 + }, + { + "epoch": 0.68, + "learning_rate": "1.9961e-04", + "loss": 1.018, + "slid_loss": 1.035, + "step": 708, + "time": 13.81 + }, + { + "epoch": 0.68, + "learning_rate": "1.9960e-04", + "loss": 0.9747, + "slid_loss": 1.0332, + "step": 709, + "time": 13.32 + }, + { + "epoch": 0.68, + "learning_rate": "1.9960e-04", + "loss": 1.1148, + "slid_loss": 1.034, + "step": 710, + "time": 13.54 + }, + { + "epoch": 0.68, + "learning_rate": "1.9959e-04", + "loss": 0.9513, + "slid_loss": 1.0324, + "step": 711, + "time": 14.44 + }, + { + "epoch": 0.68, + "learning_rate": "1.9959e-04", + "loss": 1.0239, + "slid_loss": 1.0322, + "step": 712, + "time": 12.81 + }, + { + "epoch": 0.68, + "learning_rate": "1.9959e-04", + "loss": 0.9835, + "slid_loss": 1.0317, + "step": 713, + "time": 13.59 + }, + { + "epoch": 0.69, + "learning_rate": "1.9958e-04", + "loss": 1.0818, + "slid_loss": 1.0326, + "step": 714, + "time": 13.07 + }, + { + "epoch": 0.69, + "learning_rate": "1.9958e-04", + "loss": 1.0648, + "slid_loss": 1.0328, + "step": 715, + "time": 13.87 + }, + { + "epoch": 0.69, + "learning_rate": "1.9957e-04", + "loss": 1.0765, + "slid_loss": 1.0337, + "step": 716, + "time": 12.73 + }, + { + "epoch": 0.69, + "learning_rate": "1.9957e-04", + "loss": 1.1218, + "slid_loss": 1.0348, + "step": 717, + "time": 11.81 + }, + { + "epoch": 0.69, + "learning_rate": "1.9956e-04", + "loss": 1.0908, + "slid_loss": 1.0353, + "step": 718, + "time": 13.83 + }, + { + "epoch": 0.69, + "learning_rate": "1.9956e-04", + "loss": 0.9488, + "slid_loss": 1.0343, + "step": 719, + "time": 13.42 + }, + { + "epoch": 0.69, + "learning_rate": "1.9956e-04", + "loss": 1.0339, + "slid_loss": 1.0345, + "step": 720, + "time": 13.79 + }, + { + "epoch": 0.69, + "learning_rate": "1.9955e-04", + "loss": 0.9137, + "slid_loss": 1.0325, + "step": 721, + "time": 11.54 + }, + { + "epoch": 0.69, + "learning_rate": "1.9955e-04", + "loss": 1.0983, + "slid_loss": 1.0328, + "step": 722, + "time": 13.73 + }, + { + "epoch": 0.69, + "learning_rate": "1.9954e-04", + "loss": 0.9561, + "slid_loss": 1.0324, + "step": 723, + "time": 12.89 + }, + { + "epoch": 0.7, + "learning_rate": "1.9954e-04", + "loss": 0.9089, + "slid_loss": 1.032, + "step": 724, + "time": 13.27 + }, + { + "epoch": 0.7, + "learning_rate": "1.9953e-04", + "loss": 0.9925, + "slid_loss": 1.0311, + "step": 725, + "time": 13.72 + }, + { + "epoch": 0.7, + "learning_rate": "1.9953e-04", + "loss": 1.0612, + "slid_loss": 1.0317, + "step": 726, + "time": 13.2 + }, + { + "epoch": 0.7, + "learning_rate": "1.9952e-04", + "loss": 0.9549, + "slid_loss": 1.0311, + "step": 727, + "time": 13.37 + }, + { + "epoch": 0.7, + "learning_rate": "1.9952e-04", + "loss": 1.0286, + "slid_loss": 1.0313, + "step": 728, + "time": 12.05 + }, + { + "epoch": 0.7, + "learning_rate": "1.9951e-04", + "loss": 1.0648, + "slid_loss": 1.0314, + "step": 729, + "time": 12.0 + }, + { + "epoch": 0.7, + "learning_rate": "1.9951e-04", + "loss": 1.0779, + "slid_loss": 1.0307, + "step": 730, + "time": 14.26 + }, + { + "epoch": 0.7, + "learning_rate": "1.9950e-04", + "loss": 1.0435, + "slid_loss": 1.0318, + "step": 731, + "time": 13.54 + }, + { + "epoch": 0.7, + "learning_rate": "1.9950e-04", + "loss": 1.0903, + "slid_loss": 1.0317, + "step": 732, + "time": 13.62 + }, + { + "epoch": 0.7, + "learning_rate": "1.9950e-04", + "loss": 1.0505, + "slid_loss": 1.0316, + "step": 733, + "time": 13.11 + }, + { + "epoch": 0.71, + "learning_rate": "1.9949e-04", + "loss": 0.9809, + "slid_loss": 1.0319, + "step": 734, + "time": 12.96 + }, + { + "epoch": 0.71, + "learning_rate": "1.9949e-04", + "loss": 0.9029, + "slid_loss": 1.0299, + "step": 735, + "time": 13.84 + }, + { + "epoch": 0.71, + "learning_rate": "1.9948e-04", + "loss": 1.0465, + "slid_loss": 1.0294, + "step": 736, + "time": 13.08 + }, + { + "epoch": 0.71, + "learning_rate": "1.9948e-04", + "loss": 1.0225, + "slid_loss": 1.0298, + "step": 737, + "time": 13.65 + }, + { + "epoch": 0.71, + "learning_rate": "1.9947e-04", + "loss": 1.0417, + "slid_loss": 1.0291, + "step": 738, + "time": 15.45 + }, + { + "epoch": 0.71, + "learning_rate": "1.9947e-04", + "loss": 1.0021, + "slid_loss": 1.0288, + "step": 739, + "time": 13.33 + }, + { + "epoch": 0.71, + "learning_rate": "1.9946e-04", + "loss": 0.8816, + "slid_loss": 1.0274, + "step": 740, + "time": 11.75 + }, + { + "epoch": 0.71, + "learning_rate": "1.9946e-04", + "loss": 1.1319, + "slid_loss": 1.0278, + "step": 741, + "time": 13.01 + }, + { + "epoch": 0.71, + "learning_rate": "1.9945e-04", + "loss": 1.0488, + "slid_loss": 1.0271, + "step": 742, + "time": 10.85 + }, + { + "epoch": 0.71, + "learning_rate": "1.9945e-04", + "loss": 1.0818, + "slid_loss": 1.0266, + "step": 743, + "time": 12.4 + }, + { + "epoch": 0.71, + "learning_rate": "1.9944e-04", + "loss": 1.0643, + "slid_loss": 1.0265, + "step": 744, + "time": 13.14 + }, + { + "epoch": 0.72, + "learning_rate": "1.9944e-04", + "loss": 1.0599, + "slid_loss": 1.0268, + "step": 745, + "time": 13.99 + }, + { + "epoch": 0.72, + "learning_rate": "1.9943e-04", + "loss": 1.0051, + "slid_loss": 1.0272, + "step": 746, + "time": 13.3 + }, + { + "epoch": 0.72, + "learning_rate": "1.9943e-04", + "loss": 1.0436, + "slid_loss": 1.0277, + "step": 747, + "time": 14.14 + }, + { + "epoch": 0.72, + "learning_rate": "1.9942e-04", + "loss": 1.0075, + "slid_loss": 1.0269, + "step": 748, + "time": 12.16 + }, + { + "epoch": 0.72, + "learning_rate": "1.9942e-04", + "loss": 0.994, + "slid_loss": 1.0259, + "step": 749, + "time": 13.84 + }, + { + "epoch": 0.72, + "learning_rate": "1.9941e-04", + "loss": 1.0013, + "slid_loss": 1.0251, + "step": 750, + "time": 13.67 + }, + { + "epoch": 0.72, + "learning_rate": "1.9941e-04", + "loss": 0.8608, + "slid_loss": 1.0231, + "step": 751, + "time": 12.81 + }, + { + "epoch": 0.72, + "learning_rate": "1.9940e-04", + "loss": 0.8892, + "slid_loss": 1.0223, + "step": 752, + "time": 12.83 + }, + { + "epoch": 0.72, + "learning_rate": "1.9940e-04", + "loss": 1.0035, + "slid_loss": 1.0214, + "step": 753, + "time": 13.43 + }, + { + "epoch": 0.72, + "learning_rate": "1.9939e-04", + "loss": 0.9436, + "slid_loss": 1.0204, + "step": 754, + "time": 12.22 + }, + { + "epoch": 0.73, + "learning_rate": "1.9939e-04", + "loss": 1.0129, + "slid_loss": 1.0205, + "step": 755, + "time": 13.48 + }, + { + "epoch": 0.73, + "learning_rate": "1.9938e-04", + "loss": 0.9239, + "slid_loss": 1.0197, + "step": 756, + "time": 13.14 + }, + { + "epoch": 0.73, + "learning_rate": "1.9937e-04", + "loss": 0.9885, + "slid_loss": 1.0196, + "step": 757, + "time": 13.44 + }, + { + "epoch": 0.73, + "learning_rate": "1.9937e-04", + "loss": 0.9389, + "slid_loss": 1.0188, + "step": 758, + "time": 13.37 + }, + { + "epoch": 0.73, + "learning_rate": "1.9936e-04", + "loss": 1.0808, + "slid_loss": 1.018, + "step": 759, + "time": 11.7 + }, + { + "epoch": 0.73, + "learning_rate": "1.9936e-04", + "loss": 0.9554, + "slid_loss": 1.0172, + "step": 760, + "time": 13.8 + }, + { + "epoch": 0.73, + "learning_rate": "1.9935e-04", + "loss": 1.0492, + "slid_loss": 1.0175, + "step": 761, + "time": 11.93 + }, + { + "epoch": 0.73, + "learning_rate": "1.9935e-04", + "loss": 0.9557, + "slid_loss": 1.0171, + "step": 762, + "time": 13.57 + }, + { + "epoch": 0.73, + "learning_rate": "1.9934e-04", + "loss": 1.0079, + "slid_loss": 1.0167, + "step": 763, + "time": 13.2 + }, + { + "epoch": 0.73, + "learning_rate": "1.9934e-04", + "loss": 1.0476, + "slid_loss": 1.0158, + "step": 764, + "time": 14.08 + }, + { + "epoch": 0.73, + "learning_rate": "1.9933e-04", + "loss": 1.0188, + "slid_loss": 1.0159, + "step": 765, + "time": 12.29 + }, + { + "epoch": 0.74, + "learning_rate": "1.9933e-04", + "loss": 1.0088, + "slid_loss": 1.0163, + "step": 766, + "time": 13.0 + }, + { + "epoch": 0.74, + "learning_rate": "1.9932e-04", + "loss": 0.9589, + "slid_loss": 1.0165, + "step": 767, + "time": 14.26 + }, + { + "epoch": 0.74, + "learning_rate": "1.9932e-04", + "loss": 1.0309, + "slid_loss": 1.0169, + "step": 768, + "time": 13.78 + }, + { + "epoch": 0.74, + "learning_rate": "1.9931e-04", + "loss": 1.079, + "slid_loss": 1.0169, + "step": 769, + "time": 13.73 + }, + { + "epoch": 0.74, + "learning_rate": "1.9930e-04", + "loss": 0.9621, + "slid_loss": 1.0173, + "step": 770, + "time": 12.89 + }, + { + "epoch": 0.74, + "learning_rate": "1.9930e-04", + "loss": 0.8697, + "slid_loss": 1.015, + "step": 771, + "time": 12.9 + }, + { + "epoch": 0.74, + "learning_rate": "1.9929e-04", + "loss": 0.9752, + "slid_loss": 1.0137, + "step": 772, + "time": 13.62 + }, + { + "epoch": 0.74, + "learning_rate": "1.9929e-04", + "loss": 1.1629, + "slid_loss": 1.0146, + "step": 773, + "time": 13.2 + }, + { + "epoch": 0.74, + "learning_rate": "1.9928e-04", + "loss": 0.9809, + "slid_loss": 1.0151, + "step": 774, + "time": 13.23 + }, + { + "epoch": 0.74, + "learning_rate": "1.9928e-04", + "loss": 1.0596, + "slid_loss": 1.0155, + "step": 775, + "time": 12.28 + }, + { + "epoch": 0.75, + "learning_rate": "1.9927e-04", + "loss": 1.0694, + "slid_loss": 1.0166, + "step": 776, + "time": 13.15 + }, + { + "epoch": 0.75, + "learning_rate": "1.9926e-04", + "loss": 1.0207, + "slid_loss": 1.0169, + "step": 777, + "time": 12.85 + }, + { + "epoch": 0.75, + "learning_rate": "1.9926e-04", + "loss": 1.0557, + "slid_loss": 1.016, + "step": 778, + "time": 13.39 + }, + { + "epoch": 0.75, + "learning_rate": "1.9925e-04", + "loss": 1.061, + "slid_loss": 1.0165, + "step": 779, + "time": 11.34 + }, + { + "epoch": 0.75, + "learning_rate": "1.9925e-04", + "loss": 1.1373, + "slid_loss": 1.0176, + "step": 780, + "time": 13.43 + }, + { + "epoch": 0.75, + "learning_rate": "1.9924e-04", + "loss": 1.0246, + "slid_loss": 1.0176, + "step": 781, + "time": 12.77 + }, + { + "epoch": 0.75, + "learning_rate": "1.9924e-04", + "loss": 0.9139, + "slid_loss": 1.018, + "step": 782, + "time": 12.2 + }, + { + "epoch": 0.75, + "learning_rate": "1.9923e-04", + "loss": 1.0051, + "slid_loss": 1.0178, + "step": 783, + "time": 13.71 + }, + { + "epoch": 0.75, + "learning_rate": "1.9922e-04", + "loss": 0.9709, + "slid_loss": 1.0166, + "step": 784, + "time": 13.45 + }, + { + "epoch": 0.75, + "learning_rate": "1.9922e-04", + "loss": 1.0337, + "slid_loss": 1.0154, + "step": 785, + "time": 12.71 + }, + { + "epoch": 0.76, + "learning_rate": "1.9921e-04", + "loss": 1.0183, + "slid_loss": 1.0153, + "step": 786, + "time": 12.27 + }, + { + "epoch": 0.76, + "learning_rate": "1.9921e-04", + "loss": 1.0356, + "slid_loss": 1.0151, + "step": 787, + "time": 13.63 + }, + { + "epoch": 0.76, + "learning_rate": "1.9920e-04", + "loss": 0.9522, + "slid_loss": 1.0146, + "step": 788, + "time": 12.64 + }, + { + "epoch": 0.76, + "learning_rate": "1.9919e-04", + "loss": 0.9984, + "slid_loss": 1.0137, + "step": 789, + "time": 12.9 + }, + { + "epoch": 0.76, + "learning_rate": "1.9919e-04", + "loss": 1.0674, + "slid_loss": 1.0136, + "step": 790, + "time": 11.98 + }, + { + "epoch": 0.76, + "learning_rate": "1.9918e-04", + "loss": 1.1005, + "slid_loss": 1.0139, + "step": 791, + "time": 13.38 + }, + { + "epoch": 0.76, + "learning_rate": "1.9918e-04", + "loss": 1.0685, + "slid_loss": 1.0144, + "step": 792, + "time": 13.48 + }, + { + "epoch": 0.76, + "learning_rate": "1.9917e-04", + "loss": 0.9861, + "slid_loss": 1.0141, + "step": 793, + "time": 12.81 + }, + { + "epoch": 0.76, + "learning_rate": "1.9916e-04", + "loss": 1.0828, + "slid_loss": 1.0154, + "step": 794, + "time": 13.44 + }, + { + "epoch": 0.76, + "learning_rate": "1.9916e-04", + "loss": 0.9249, + "slid_loss": 1.0146, + "step": 795, + "time": 13.54 + }, + { + "epoch": 0.76, + "learning_rate": "1.9915e-04", + "loss": 0.988, + "slid_loss": 1.0141, + "step": 796, + "time": 14.08 + }, + { + "epoch": 0.77, + "learning_rate": "1.9915e-04", + "loss": 1.0391, + "slid_loss": 1.014, + "step": 797, + "time": 13.39 + }, + { + "epoch": 0.77, + "learning_rate": "1.9914e-04", + "loss": 1.0448, + "slid_loss": 1.0154, + "step": 798, + "time": 11.38 + }, + { + "epoch": 0.77, + "learning_rate": "1.9913e-04", + "loss": 1.0193, + "slid_loss": 1.0152, + "step": 799, + "time": 12.58 + }, + { + "epoch": 0.77, + "learning_rate": "1.9913e-04", + "loss": 0.9802, + "slid_loss": 1.0143, + "step": 800, + "time": 11.92 + }, + { + "epoch": 0.77, + "learning_rate": "1.9912e-04", + "loss": 0.9691, + "slid_loss": 1.0141, + "step": 801, + "time": 13.18 + }, + { + "epoch": 0.77, + "learning_rate": "1.9911e-04", + "loss": 1.178, + "slid_loss": 1.0166, + "step": 802, + "time": 12.85 + }, + { + "epoch": 0.77, + "learning_rate": "1.9911e-04", + "loss": 0.9051, + "slid_loss": 1.0165, + "step": 803, + "time": 12.87 + }, + { + "epoch": 0.77, + "learning_rate": "1.9910e-04", + "loss": 0.8386, + "slid_loss": 1.0147, + "step": 804, + "time": 14.14 + }, + { + "epoch": 0.77, + "learning_rate": "1.9910e-04", + "loss": 0.9707, + "slid_loss": 1.0137, + "step": 805, + "time": 13.6 + }, + { + "epoch": 0.77, + "learning_rate": "1.9909e-04", + "loss": 1.0228, + "slid_loss": 1.0133, + "step": 806, + "time": 11.17 + }, + { + "epoch": 0.78, + "learning_rate": "1.9908e-04", + "loss": 0.9951, + "slid_loss": 1.0128, + "step": 807, + "time": 12.94 + }, + { + "epoch": 0.78, + "learning_rate": "1.9908e-04", + "loss": 0.9642, + "slid_loss": 1.0123, + "step": 808, + "time": 12.38 + }, + { + "epoch": 0.78, + "learning_rate": "1.9907e-04", + "loss": 0.9216, + "slid_loss": 1.0118, + "step": 809, + "time": 13.01 + }, + { + "epoch": 0.78, + "learning_rate": "1.9906e-04", + "loss": 0.9235, + "slid_loss": 1.0099, + "step": 810, + "time": 13.19 + }, + { + "epoch": 0.78, + "learning_rate": "1.9906e-04", + "loss": 1.06, + "slid_loss": 1.0109, + "step": 811, + "time": 13.56 + }, + { + "epoch": 0.78, + "learning_rate": "1.9905e-04", + "loss": 1.0032, + "slid_loss": 1.0107, + "step": 812, + "time": 14.05 + }, + { + "epoch": 0.78, + "learning_rate": "1.9904e-04", + "loss": 1.0168, + "slid_loss": 1.0111, + "step": 813, + "time": 13.28 + }, + { + "epoch": 0.78, + "learning_rate": "1.9904e-04", + "loss": 1.0079, + "slid_loss": 1.0103, + "step": 814, + "time": 13.32 + }, + { + "epoch": 0.78, + "learning_rate": "1.9903e-04", + "loss": 0.9326, + "slid_loss": 1.009, + "step": 815, + "time": 11.06 + }, + { + "epoch": 0.78, + "learning_rate": "1.9902e-04", + "loss": 1.0019, + "slid_loss": 1.0083, + "step": 816, + "time": 11.96 + }, + { + "epoch": 0.78, + "learning_rate": "1.9902e-04", + "loss": 0.9545, + "slid_loss": 1.0066, + "step": 817, + "time": 13.87 + }, + { + "epoch": 0.79, + "learning_rate": "1.9901e-04", + "loss": 1.0344, + "slid_loss": 1.006, + "step": 818, + "time": 13.38 + }, + { + "epoch": 0.79, + "learning_rate": "1.9900e-04", + "loss": 1.0008, + "slid_loss": 1.0066, + "step": 819, + "time": 12.88 + }, + { + "epoch": 0.79, + "learning_rate": "1.9900e-04", + "loss": 0.9723, + "slid_loss": 1.0059, + "step": 820, + "time": 13.79 + }, + { + "epoch": 0.79, + "learning_rate": "1.9899e-04", + "loss": 1.0433, + "slid_loss": 1.0072, + "step": 821, + "time": 13.25 + }, + { + "epoch": 0.79, + "learning_rate": "1.9898e-04", + "loss": 1.0467, + "slid_loss": 1.0067, + "step": 822, + "time": 13.1 + }, + { + "epoch": 0.79, + "learning_rate": "1.9898e-04", + "loss": 1.008, + "slid_loss": 1.0072, + "step": 823, + "time": 13.24 + }, + { + "epoch": 0.79, + "learning_rate": "1.9897e-04", + "loss": 1.0883, + "slid_loss": 1.009, + "step": 824, + "time": 13.23 + }, + { + "epoch": 0.79, + "learning_rate": "1.9896e-04", + "loss": 1.0326, + "slid_loss": 1.0094, + "step": 825, + "time": 13.75 + }, + { + "epoch": 0.79, + "learning_rate": "1.9896e-04", + "loss": 1.0183, + "slid_loss": 1.009, + "step": 826, + "time": 12.8 + }, + { + "epoch": 0.79, + "learning_rate": "1.9895e-04", + "loss": 0.9839, + "slid_loss": 1.0093, + "step": 827, + "time": 13.46 + }, + { + "epoch": 0.8, + "learning_rate": "1.9894e-04", + "loss": 1.0894, + "slid_loss": 1.0099, + "step": 828, + "time": 12.91 + }, + { + "epoch": 0.8, + "learning_rate": "1.9894e-04", + "loss": 1.0159, + "slid_loss": 1.0094, + "step": 829, + "time": 12.81 + }, + { + "epoch": 0.8, + "learning_rate": "1.9893e-04", + "loss": 0.9974, + "slid_loss": 1.0086, + "step": 830, + "time": 13.63 + }, + { + "epoch": 0.8, + "learning_rate": "1.9892e-04", + "loss": 1.0866, + "slid_loss": 1.009, + "step": 831, + "time": 13.59 + }, + { + "epoch": 0.8, + "learning_rate": "1.9892e-04", + "loss": 0.9809, + "slid_loss": 1.0079, + "step": 832, + "time": 13.33 + }, + { + "epoch": 0.8, + "learning_rate": "1.9891e-04", + "loss": 0.9967, + "slid_loss": 1.0074, + "step": 833, + "time": 13.67 + }, + { + "epoch": 0.8, + "learning_rate": "1.9890e-04", + "loss": 1.0102, + "slid_loss": 1.0077, + "step": 834, + "time": 12.24 + }, + { + "epoch": 0.8, + "learning_rate": "1.9890e-04", + "loss": 1.0953, + "slid_loss": 1.0096, + "step": 835, + "time": 13.63 + }, + { + "epoch": 0.8, + "learning_rate": "1.9889e-04", + "loss": 1.0098, + "slid_loss": 1.0093, + "step": 836, + "time": 13.33 + }, + { + "epoch": 0.8, + "learning_rate": "1.9888e-04", + "loss": 0.9716, + "slid_loss": 1.0087, + "step": 837, + "time": 13.79 + }, + { + "epoch": 0.8, + "learning_rate": "1.9887e-04", + "loss": 1.004, + "slid_loss": 1.0084, + "step": 838, + "time": 14.06 + }, + { + "epoch": 0.81, + "learning_rate": "1.9887e-04", + "loss": 0.9311, + "slid_loss": 1.0077, + "step": 839, + "time": 13.39 + }, + { + "epoch": 0.81, + "learning_rate": "1.9886e-04", + "loss": 1.1044, + "slid_loss": 1.0099, + "step": 840, + "time": 14.15 + }, + { + "epoch": 0.81, + "learning_rate": "1.9885e-04", + "loss": 1.0054, + "slid_loss": 1.0086, + "step": 841, + "time": 12.35 + }, + { + "epoch": 0.81, + "learning_rate": "1.9885e-04", + "loss": 1.0644, + "slid_loss": 1.0088, + "step": 842, + "time": 13.15 + }, + { + "epoch": 0.81, + "learning_rate": "1.9884e-04", + "loss": 1.0625, + "slid_loss": 1.0086, + "step": 843, + "time": 11.07 + }, + { + "epoch": 0.81, + "learning_rate": "1.9883e-04", + "loss": 0.9804, + "slid_loss": 1.0077, + "step": 844, + "time": 13.4 + }, + { + "epoch": 0.81, + "learning_rate": "1.9882e-04", + "loss": 1.0813, + "slid_loss": 1.008, + "step": 845, + "time": 13.14 + }, + { + "epoch": 0.81, + "learning_rate": "1.9882e-04", + "loss": 1.1213, + "slid_loss": 1.0091, + "step": 846, + "time": 13.86 + }, + { + "epoch": 0.81, + "learning_rate": "1.9881e-04", + "loss": 1.0148, + "slid_loss": 1.0088, + "step": 847, + "time": 13.13 + }, + { + "epoch": 0.81, + "learning_rate": "1.9880e-04", + "loss": 1.0477, + "slid_loss": 1.0092, + "step": 848, + "time": 11.54 + }, + { + "epoch": 0.82, + "learning_rate": "1.9879e-04", + "loss": 0.9842, + "slid_loss": 1.0091, + "step": 849, + "time": 11.95 + }, + { + "epoch": 0.82, + "learning_rate": "1.9879e-04", + "loss": 1.0497, + "slid_loss": 1.0096, + "step": 850, + "time": 11.62 + }, + { + "epoch": 0.82, + "learning_rate": "1.9878e-04", + "loss": 0.9359, + "slid_loss": 1.0104, + "step": 851, + "time": 13.4 + }, + { + "epoch": 0.82, + "learning_rate": "1.9877e-04", + "loss": 1.1212, + "slid_loss": 1.0127, + "step": 852, + "time": 14.07 + }, + { + "epoch": 0.82, + "learning_rate": "1.9877e-04", + "loss": 0.9765, + "slid_loss": 1.0124, + "step": 853, + "time": 11.32 + }, + { + "epoch": 0.82, + "learning_rate": "1.9876e-04", + "loss": 0.9339, + "slid_loss": 1.0123, + "step": 854, + "time": 13.27 + }, + { + "epoch": 0.82, + "learning_rate": "1.9875e-04", + "loss": 1.0383, + "slid_loss": 1.0126, + "step": 855, + "time": 13.74 + }, + { + "epoch": 0.82, + "learning_rate": "1.9874e-04", + "loss": 0.9501, + "slid_loss": 1.0128, + "step": 856, + "time": 13.37 + }, + { + "epoch": 0.82, + "learning_rate": "1.9874e-04", + "loss": 0.9437, + "slid_loss": 1.0124, + "step": 857, + "time": 10.72 + }, + { + "epoch": 0.82, + "learning_rate": "1.9873e-04", + "loss": 0.9241, + "slid_loss": 1.0122, + "step": 858, + "time": 11.27 + }, + { + "epoch": 0.83, + "learning_rate": "1.9872e-04", + "loss": 0.9676, + "slid_loss": 1.0111, + "step": 859, + "time": 13.36 + }, + { + "epoch": 0.83, + "learning_rate": "1.9871e-04", + "loss": 1.0482, + "slid_loss": 1.012, + "step": 860, + "time": 12.76 + }, + { + "epoch": 0.83, + "learning_rate": "1.9871e-04", + "loss": 0.9695, + "slid_loss": 1.0112, + "step": 861, + "time": 13.16 + }, + { + "epoch": 0.83, + "learning_rate": "1.9870e-04", + "loss": 0.9381, + "slid_loss": 1.0111, + "step": 862, + "time": 11.15 + }, + { + "epoch": 0.83, + "learning_rate": "1.9869e-04", + "loss": 0.9073, + "slid_loss": 1.0101, + "step": 863, + "time": 11.9 + }, + { + "epoch": 0.83, + "learning_rate": "1.9868e-04", + "loss": 0.9883, + "slid_loss": 1.0095, + "step": 864, + "time": 13.47 + }, + { + "epoch": 0.83, + "learning_rate": "1.9868e-04", + "loss": 1.0486, + "slid_loss": 1.0098, + "step": 865, + "time": 13.33 + }, + { + "epoch": 0.83, + "learning_rate": "1.9867e-04", + "loss": 1.0283, + "slid_loss": 1.01, + "step": 866, + "time": 13.65 + }, + { + "epoch": 0.83, + "learning_rate": "1.9866e-04", + "loss": 0.8641, + "slid_loss": 1.009, + "step": 867, + "time": 14.34 + }, + { + "epoch": 0.83, + "learning_rate": "1.9865e-04", + "loss": 1.0782, + "slid_loss": 1.0095, + "step": 868, + "time": 14.0 + }, + { + "epoch": 0.83, + "learning_rate": "1.9864e-04", + "loss": 0.9565, + "slid_loss": 1.0083, + "step": 869, + "time": 13.45 + }, + { + "epoch": 0.84, + "learning_rate": "1.9864e-04", + "loss": 1.0361, + "slid_loss": 1.009, + "step": 870, + "time": 11.26 + }, + { + "epoch": 0.84, + "learning_rate": "1.9863e-04", + "loss": 0.9371, + "slid_loss": 1.0097, + "step": 871, + "time": 11.64 + }, + { + "epoch": 0.84, + "learning_rate": "1.9862e-04", + "loss": 0.977, + "slid_loss": 1.0097, + "step": 872, + "time": 13.16 + }, + { + "epoch": 0.84, + "learning_rate": "1.9861e-04", + "loss": 0.9393, + "slid_loss": 1.0075, + "step": 873, + "time": 11.67 + }, + { + "epoch": 0.84, + "learning_rate": "1.9861e-04", + "loss": 1.0374, + "slid_loss": 1.008, + "step": 874, + "time": 13.16 + }, + { + "epoch": 0.84, + "learning_rate": "1.9860e-04", + "loss": 0.8848, + "slid_loss": 1.0063, + "step": 875, + "time": 12.25 + }, + { + "epoch": 0.84, + "learning_rate": "1.9859e-04", + "loss": 1.0191, + "slid_loss": 1.0058, + "step": 876, + "time": 13.55 + }, + { + "epoch": 0.84, + "learning_rate": "1.9858e-04", + "loss": 1.0166, + "slid_loss": 1.0057, + "step": 877, + "time": 11.8 + }, + { + "epoch": 0.84, + "learning_rate": "1.9857e-04", + "loss": 0.9387, + "slid_loss": 1.0046, + "step": 878, + "time": 13.02 + }, + { + "epoch": 0.84, + "learning_rate": "1.9857e-04", + "loss": 1.1353, + "slid_loss": 1.0053, + "step": 879, + "time": 13.41 + }, + { + "epoch": 0.85, + "learning_rate": "1.9856e-04", + "loss": 0.9761, + "slid_loss": 1.0037, + "step": 880, + "time": 12.28 + }, + { + "epoch": 0.85, + "learning_rate": "1.9855e-04", + "loss": 1.0366, + "slid_loss": 1.0038, + "step": 881, + "time": 12.8 + }, + { + "epoch": 0.85, + "learning_rate": "1.9854e-04", + "loss": 0.9963, + "slid_loss": 1.0046, + "step": 882, + "time": 13.15 + }, + { + "epoch": 0.85, + "learning_rate": "1.9853e-04", + "loss": 0.9585, + "slid_loss": 1.0042, + "step": 883, + "time": 13.67 + }, + { + "epoch": 0.85, + "learning_rate": "1.9853e-04", + "loss": 1.0659, + "slid_loss": 1.0051, + "step": 884, + "time": 12.9 + }, + { + "epoch": 0.85, + "learning_rate": "1.9852e-04", + "loss": 1.0106, + "slid_loss": 1.0049, + "step": 885, + "time": 13.34 + }, + { + "epoch": 0.85, + "learning_rate": "1.9851e-04", + "loss": 1.0823, + "slid_loss": 1.0055, + "step": 886, + "time": 11.08 + }, + { + "epoch": 0.85, + "learning_rate": "1.9850e-04", + "loss": 1.0362, + "slid_loss": 1.0055, + "step": 887, + "time": 11.72 + }, + { + "epoch": 0.85, + "learning_rate": "1.9849e-04", + "loss": 0.944, + "slid_loss": 1.0055, + "step": 888, + "time": 11.59 + }, + { + "epoch": 0.85, + "learning_rate": "1.9848e-04", + "loss": 0.9746, + "slid_loss": 1.0052, + "step": 889, + "time": 11.58 + }, + { + "epoch": 0.85, + "learning_rate": "1.9848e-04", + "loss": 0.9764, + "slid_loss": 1.0043, + "step": 890, + "time": 12.61 + }, + { + "epoch": 0.86, + "learning_rate": "1.9847e-04", + "loss": 1.0767, + "slid_loss": 1.0041, + "step": 891, + "time": 12.97 + }, + { + "epoch": 0.86, + "learning_rate": "1.9846e-04", + "loss": 0.9895, + "slid_loss": 1.0033, + "step": 892, + "time": 13.39 + }, + { + "epoch": 0.86, + "learning_rate": "1.9845e-04", + "loss": 1.0639, + "slid_loss": 1.0041, + "step": 893, + "time": 13.13 + }, + { + "epoch": 0.86, + "learning_rate": "1.9844e-04", + "loss": 0.971, + "slid_loss": 1.0029, + "step": 894, + "time": 15.02 + }, + { + "epoch": 0.86, + "learning_rate": "1.9844e-04", + "loss": 0.9143, + "slid_loss": 1.0028, + "step": 895, + "time": 13.23 + }, + { + "epoch": 0.86, + "learning_rate": "1.9843e-04", + "loss": 0.9306, + "slid_loss": 1.0023, + "step": 896, + "time": 13.02 + }, + { + "epoch": 0.86, + "learning_rate": "1.9842e-04", + "loss": 0.9732, + "slid_loss": 1.0016, + "step": 897, + "time": 13.58 + }, + { + "epoch": 0.86, + "learning_rate": "1.9841e-04", + "loss": 0.8705, + "slid_loss": 0.9999, + "step": 898, + "time": 12.76 + }, + { + "epoch": 0.86, + "learning_rate": "1.9840e-04", + "loss": 1.1178, + "slid_loss": 1.0008, + "step": 899, + "time": 13.4 + }, + { + "epoch": 0.86, + "learning_rate": "1.9839e-04", + "loss": 1.0061, + "slid_loss": 1.0011, + "step": 900, + "time": 12.55 + }, + { + "epoch": 0.87, + "learning_rate": "1.9838e-04", + "loss": 0.9413, + "slid_loss": 1.0008, + "step": 901, + "time": 12.02 + }, + { + "epoch": 0.87, + "learning_rate": "1.9838e-04", + "loss": 0.919, + "slid_loss": 0.9982, + "step": 902, + "time": 13.56 + }, + { + "epoch": 0.87, + "learning_rate": "1.9837e-04", + "loss": 0.9952, + "slid_loss": 0.9991, + "step": 903, + "time": 13.15 + }, + { + "epoch": 0.87, + "learning_rate": "1.9836e-04", + "loss": 0.9613, + "slid_loss": 1.0004, + "step": 904, + "time": 13.41 + }, + { + "epoch": 0.87, + "learning_rate": "1.9835e-04", + "loss": 0.8695, + "slid_loss": 0.9993, + "step": 905, + "time": 12.81 + }, + { + "epoch": 0.87, + "learning_rate": "1.9834e-04", + "loss": 1.0116, + "slid_loss": 0.9992, + "step": 906, + "time": 14.25 + }, + { + "epoch": 0.87, + "learning_rate": "1.9833e-04", + "loss": 0.9132, + "slid_loss": 0.9984, + "step": 907, + "time": 13.97 + }, + { + "epoch": 0.87, + "learning_rate": "1.9833e-04", + "loss": 0.9325, + "slid_loss": 0.9981, + "step": 908, + "time": 12.81 + }, + { + "epoch": 0.87, + "learning_rate": "1.9832e-04", + "loss": 0.9653, + "slid_loss": 0.9985, + "step": 909, + "time": 13.37 + }, + { + "epoch": 0.87, + "learning_rate": "1.9831e-04", + "loss": 0.9469, + "slid_loss": 0.9988, + "step": 910, + "time": 13.21 + }, + { + "epoch": 0.88, + "learning_rate": "1.9830e-04", + "loss": 1.0618, + "slid_loss": 0.9988, + "step": 911, + "time": 13.79 + }, + { + "epoch": 0.88, + "learning_rate": "1.9829e-04", + "loss": 1.0181, + "slid_loss": 0.9989, + "step": 912, + "time": 13.09 + }, + { + "epoch": 0.88, + "learning_rate": "1.9828e-04", + "loss": 0.9328, + "slid_loss": 0.9981, + "step": 913, + "time": 13.3 + }, + { + "epoch": 0.88, + "learning_rate": "1.9827e-04", + "loss": 1.0416, + "slid_loss": 0.9984, + "step": 914, + "time": 13.81 + }, + { + "epoch": 0.88, + "learning_rate": "1.9826e-04", + "loss": 1.0824, + "slid_loss": 0.9999, + "step": 915, + "time": 13.18 + }, + { + "epoch": 0.88, + "learning_rate": "1.9826e-04", + "loss": 1.0186, + "slid_loss": 1.0001, + "step": 916, + "time": 11.26 + }, + { + "epoch": 0.88, + "learning_rate": "1.9825e-04", + "loss": 0.9815, + "slid_loss": 1.0004, + "step": 917, + "time": 12.98 + }, + { + "epoch": 0.88, + "learning_rate": "1.9824e-04", + "loss": 1.0509, + "slid_loss": 1.0005, + "step": 918, + "time": 13.31 + }, + { + "epoch": 0.88, + "learning_rate": "1.9823e-04", + "loss": 1.0622, + "slid_loss": 1.0011, + "step": 919, + "time": 12.84 + }, + { + "epoch": 0.88, + "learning_rate": "1.9822e-04", + "loss": 0.9517, + "slid_loss": 1.0009, + "step": 920, + "time": 11.9 + }, + { + "epoch": 0.88, + "learning_rate": "1.9821e-04", + "loss": 1.021, + "slid_loss": 1.0007, + "step": 921, + "time": 13.68 + }, + { + "epoch": 0.89, + "learning_rate": "1.9820e-04", + "loss": 0.9134, + "slid_loss": 0.9994, + "step": 922, + "time": 12.39 + }, + { + "epoch": 0.89, + "learning_rate": "1.9819e-04", + "loss": 0.8823, + "slid_loss": 0.9981, + "step": 923, + "time": 11.68 + }, + { + "epoch": 0.89, + "learning_rate": "1.9818e-04", + "loss": 1.0961, + "slid_loss": 0.9982, + "step": 924, + "time": 13.34 + }, + { + "epoch": 0.89, + "learning_rate": "1.9818e-04", + "loss": 0.9627, + "slid_loss": 0.9975, + "step": 925, + "time": 14.17 + }, + { + "epoch": 0.89, + "learning_rate": "1.9817e-04", + "loss": 0.9509, + "slid_loss": 0.9968, + "step": 926, + "time": 14.36 + }, + { + "epoch": 0.89, + "learning_rate": "1.9816e-04", + "loss": 0.966, + "slid_loss": 0.9967, + "step": 927, + "time": 13.96 + }, + { + "epoch": 0.89, + "learning_rate": "1.9815e-04", + "loss": 0.9077, + "slid_loss": 0.9948, + "step": 928, + "time": 13.91 + }, + { + "epoch": 0.89, + "learning_rate": "1.9814e-04", + "loss": 1.0008, + "slid_loss": 0.9947, + "step": 929, + "time": 12.87 + }, + { + "epoch": 0.89, + "learning_rate": "1.9813e-04", + "loss": 0.9972, + "slid_loss": 0.9947, + "step": 930, + "time": 13.14 + }, + { + "epoch": 0.89, + "learning_rate": "1.9812e-04", + "loss": 1.0144, + "slid_loss": 0.994, + "step": 931, + "time": 13.88 + }, + { + "epoch": 0.9, + "learning_rate": "1.9811e-04", + "loss": 1.0171, + "slid_loss": 0.9943, + "step": 932, + "time": 12.88 + }, + { + "epoch": 0.9, + "learning_rate": "1.9810e-04", + "loss": 0.941, + "slid_loss": 0.9938, + "step": 933, + "time": 13.12 + }, + { + "epoch": 0.9, + "learning_rate": "1.9809e-04", + "loss": 1.0739, + "slid_loss": 0.9944, + "step": 934, + "time": 14.38 + }, + { + "epoch": 0.9, + "learning_rate": "1.9808e-04", + "loss": 0.8783, + "slid_loss": 0.9922, + "step": 935, + "time": 11.91 + }, + { + "epoch": 0.9, + "learning_rate": "1.9808e-04", + "loss": 0.9827, + "slid_loss": 0.992, + "step": 936, + "time": 14.06 + }, + { + "epoch": 0.9, + "learning_rate": "1.9807e-04", + "loss": 0.973, + "slid_loss": 0.992, + "step": 937, + "time": 13.76 + }, + { + "epoch": 0.9, + "learning_rate": "1.9806e-04", + "loss": 0.9081, + "slid_loss": 0.991, + "step": 938, + "time": 13.2 + }, + { + "epoch": 0.9, + "learning_rate": "1.9805e-04", + "loss": 1.0273, + "slid_loss": 0.992, + "step": 939, + "time": 13.95 + }, + { + "epoch": 0.9, + "learning_rate": "1.9804e-04", + "loss": 1.08, + "slid_loss": 0.9917, + "step": 940, + "time": 12.28 + }, + { + "epoch": 0.9, + "learning_rate": "1.9803e-04", + "loss": 0.9241, + "slid_loss": 0.9909, + "step": 941, + "time": 11.46 + }, + { + "epoch": 0.9, + "learning_rate": "1.9802e-04", + "loss": 1.0113, + "slid_loss": 0.9904, + "step": 942, + "time": 13.27 + }, + { + "epoch": 0.91, + "learning_rate": "1.9801e-04", + "loss": 0.9796, + "slid_loss": 0.9896, + "step": 943, + "time": 14.52 + }, + { + "epoch": 0.91, + "learning_rate": "1.9800e-04", + "loss": 0.9488, + "slid_loss": 0.9892, + "step": 944, + "time": 13.68 + }, + { + "epoch": 0.91, + "learning_rate": "1.9799e-04", + "loss": 0.924, + "slid_loss": 0.9877, + "step": 945, + "time": 12.8 + }, + { + "epoch": 0.91, + "learning_rate": "1.9798e-04", + "loss": 1.1244, + "slid_loss": 0.9877, + "step": 946, + "time": 13.64 + }, + { + "epoch": 0.91, + "learning_rate": "1.9797e-04", + "loss": 1.0146, + "slid_loss": 0.9877, + "step": 947, + "time": 13.15 + }, + { + "epoch": 0.91, + "learning_rate": "1.9796e-04", + "loss": 0.9195, + "slid_loss": 0.9864, + "step": 948, + "time": 13.7 + }, + { + "epoch": 0.91, + "learning_rate": "1.9795e-04", + "loss": 0.9465, + "slid_loss": 0.986, + "step": 949, + "time": 14.5 + }, + { + "epoch": 0.91, + "learning_rate": "1.9794e-04", + "loss": 0.8867, + "slid_loss": 0.9844, + "step": 950, + "time": 14.26 + }, + { + "epoch": 0.91, + "learning_rate": "1.9793e-04", + "loss": 1.002, + "slid_loss": 0.9851, + "step": 951, + "time": 13.3 + }, + { + "epoch": 0.91, + "learning_rate": "1.9793e-04", + "loss": 0.9565, + "slid_loss": 0.9834, + "step": 952, + "time": 12.94 + }, + { + "epoch": 0.92, + "learning_rate": "1.9792e-04", + "loss": 0.9462, + "slid_loss": 0.9831, + "step": 953, + "time": 12.84 + }, + { + "epoch": 0.92, + "learning_rate": "1.9791e-04", + "loss": 0.996, + "slid_loss": 0.9837, + "step": 954, + "time": 13.41 + }, + { + "epoch": 0.92, + "learning_rate": "1.9790e-04", + "loss": 0.9394, + "slid_loss": 0.9828, + "step": 955, + "time": 13.21 + }, + { + "epoch": 0.92, + "learning_rate": "1.9789e-04", + "loss": 0.9914, + "slid_loss": 0.9832, + "step": 956, + "time": 13.42 + }, + { + "epoch": 0.92, + "learning_rate": "1.9788e-04", + "loss": 0.843, + "slid_loss": 0.9822, + "step": 957, + "time": 12.82 + }, + { + "epoch": 0.92, + "learning_rate": "1.9787e-04", + "loss": 0.9026, + "slid_loss": 0.9819, + "step": 958, + "time": 13.03 + }, + { + "epoch": 0.92, + "learning_rate": "1.9786e-04", + "loss": 0.9919, + "slid_loss": 0.9822, + "step": 959, + "time": 12.68 + }, + { + "epoch": 0.92, + "learning_rate": "1.9785e-04", + "loss": 1.0591, + "slid_loss": 0.9823, + "step": 960, + "time": 13.94 + }, + { + "epoch": 0.92, + "learning_rate": "1.9784e-04", + "loss": 0.9357, + "slid_loss": 0.982, + "step": 961, + "time": 13.3 + }, + { + "epoch": 0.92, + "learning_rate": "1.9783e-04", + "loss": 0.9297, + "slid_loss": 0.9819, + "step": 962, + "time": 12.21 + }, + { + "epoch": 0.93, + "learning_rate": "1.9782e-04", + "loss": 1.0105, + "slid_loss": 0.9829, + "step": 963, + "time": 12.44 + }, + { + "epoch": 0.93, + "learning_rate": "1.9781e-04", + "loss": 1.0013, + "slid_loss": 0.983, + "step": 964, + "time": 14.48 + }, + { + "epoch": 0.93, + "learning_rate": "1.9780e-04", + "loss": 0.967, + "slid_loss": 0.9822, + "step": 965, + "time": 13.09 + }, + { + "epoch": 0.93, + "learning_rate": "1.9779e-04", + "loss": 1.0322, + "slid_loss": 0.9823, + "step": 966, + "time": 13.75 + }, + { + "epoch": 0.93, + "learning_rate": "1.9778e-04", + "loss": 0.9993, + "slid_loss": 0.9836, + "step": 967, + "time": 13.18 + }, + { + "epoch": 0.93, + "learning_rate": "1.9777e-04", + "loss": 0.9493, + "slid_loss": 0.9823, + "step": 968, + "time": 13.09 + }, + { + "epoch": 0.93, + "learning_rate": "1.9776e-04", + "loss": 0.8998, + "slid_loss": 0.9818, + "step": 969, + "time": 13.85 + }, + { + "epoch": 0.93, + "learning_rate": "1.9775e-04", + "loss": 0.9338, + "slid_loss": 0.9807, + "step": 970, + "time": 13.28 + }, + { + "epoch": 0.93, + "learning_rate": "1.9774e-04", + "loss": 1.0208, + "slid_loss": 0.9816, + "step": 971, + "time": 13.8 + }, + { + "epoch": 0.93, + "learning_rate": "1.9773e-04", + "loss": 0.956, + "slid_loss": 0.9814, + "step": 972, + "time": 13.25 + }, + { + "epoch": 0.93, + "learning_rate": "1.9772e-04", + "loss": 0.9816, + "slid_loss": 0.9818, + "step": 973, + "time": 13.86 + }, + { + "epoch": 0.94, + "learning_rate": "1.9771e-04", + "loss": 0.921, + "slid_loss": 0.9806, + "step": 974, + "time": 12.45 + }, + { + "epoch": 0.94, + "learning_rate": "1.9770e-04", + "loss": 0.9402, + "slid_loss": 0.9812, + "step": 975, + "time": 14.01 + }, + { + "epoch": 0.94, + "learning_rate": "1.9769e-04", + "loss": 0.8464, + "slid_loss": 0.9794, + "step": 976, + "time": 13.15 + }, + { + "epoch": 0.94, + "learning_rate": "1.9768e-04", + "loss": 0.9949, + "slid_loss": 0.9792, + "step": 977, + "time": 13.7 + }, + { + "epoch": 0.94, + "learning_rate": "1.9767e-04", + "loss": 0.9473, + "slid_loss": 0.9793, + "step": 978, + "time": 11.32 + }, + { + "epoch": 0.94, + "learning_rate": "1.9766e-04", + "loss": 1.0231, + "slid_loss": 0.9782, + "step": 979, + "time": 13.93 + }, + { + "epoch": 0.94, + "learning_rate": "1.9765e-04", + "loss": 0.9871, + "slid_loss": 0.9783, + "step": 980, + "time": 13.42 + }, + { + "epoch": 0.94, + "learning_rate": "1.9764e-04", + "loss": 1.0541, + "slid_loss": 0.9785, + "step": 981, + "time": 13.23 + }, + { + "epoch": 0.94, + "learning_rate": "1.9763e-04", + "loss": 0.916, + "slid_loss": 0.9777, + "step": 982, + "time": 12.0 + }, + { + "epoch": 0.94, + "learning_rate": "1.9762e-04", + "loss": 0.937, + "slid_loss": 0.9775, + "step": 983, + "time": 12.75 + }, + { + "epoch": 0.95, + "learning_rate": "1.9761e-04", + "loss": 0.9628, + "slid_loss": 0.9764, + "step": 984, + "time": 13.37 + }, + { + "epoch": 0.95, + "learning_rate": "1.9760e-04", + "loss": 1.1171, + "slid_loss": 0.9775, + "step": 985, + "time": 13.55 + }, + { + "epoch": 0.95, + "learning_rate": "1.9759e-04", + "loss": 1.032, + "slid_loss": 0.977, + "step": 986, + "time": 12.24 + }, + { + "epoch": 0.95, + "learning_rate": "1.9758e-04", + "loss": 0.97, + "slid_loss": 0.9763, + "step": 987, + "time": 14.3 + }, + { + "epoch": 0.95, + "learning_rate": "1.9757e-04", + "loss": 0.9148, + "slid_loss": 0.976, + "step": 988, + "time": 11.34 + }, + { + "epoch": 0.95, + "learning_rate": "1.9756e-04", + "loss": 0.9893, + "slid_loss": 0.9762, + "step": 989, + "time": 13.78 + }, + { + "epoch": 0.95, + "learning_rate": "1.9755e-04", + "loss": 1.0352, + "slid_loss": 0.9768, + "step": 990, + "time": 13.91 + }, + { + "epoch": 0.95, + "learning_rate": "1.9754e-04", + "loss": 1.0895, + "slid_loss": 0.9769, + "step": 991, + "time": 13.63 + }, + { + "epoch": 0.95, + "learning_rate": "1.9753e-04", + "loss": 1.0234, + "slid_loss": 0.9772, + "step": 992, + "time": 13.62 + }, + { + "epoch": 0.95, + "learning_rate": "1.9752e-04", + "loss": 0.8328, + "slid_loss": 0.9749, + "step": 993, + "time": 13.77 + }, + { + "epoch": 0.95, + "learning_rate": "1.9750e-04", + "loss": 0.9009, + "slid_loss": 0.9742, + "step": 994, + "time": 11.7 + }, + { + "epoch": 0.96, + "learning_rate": "1.9749e-04", + "loss": 0.9438, + "slid_loss": 0.9745, + "step": 995, + "time": 13.54 + }, + { + "epoch": 0.96, + "learning_rate": "1.9748e-04", + "loss": 1.0395, + "slid_loss": 0.9756, + "step": 996, + "time": 12.84 + }, + { + "epoch": 0.96, + "learning_rate": "1.9747e-04", + "loss": 0.9603, + "slid_loss": 0.9755, + "step": 997, + "time": 13.33 + }, + { + "epoch": 0.96, + "learning_rate": "1.9746e-04", + "loss": 1.0074, + "slid_loss": 0.9769, + "step": 998, + "time": 12.81 + }, + { + "epoch": 0.96, + "learning_rate": "1.9745e-04", + "loss": 1.0163, + "slid_loss": 0.9758, + "step": 999, + "time": 10.79 + }, + { + "epoch": 0.96, + "learning_rate": "1.9744e-04", + "loss": 0.9061, + "slid_loss": 0.9748, + "step": 1000, + "time": 13.43 + }, + { + "epoch": 0.96, + "learning_rate": "1.9743e-04", + "loss": 0.92, + "slid_loss": 0.9746, + "step": 1001, + "time": 12.93 + }, + { + "epoch": 0.96, + "learning_rate": "1.9742e-04", + "loss": 1.0226, + "slid_loss": 0.9757, + "step": 1002, + "time": 13.41 + }, + { + "epoch": 0.96, + "learning_rate": "1.9741e-04", + "loss": 1.0246, + "slid_loss": 0.976, + "step": 1003, + "time": 11.67 + }, + { + "epoch": 0.96, + "learning_rate": "1.9740e-04", + "loss": 0.9364, + "slid_loss": 0.9757, + "step": 1004, + "time": 11.66 + }, + { + "epoch": 0.97, + "learning_rate": "1.9739e-04", + "loss": 0.997, + "slid_loss": 0.977, + "step": 1005, + "time": 13.38 + }, + { + "epoch": 0.97, + "learning_rate": "1.9738e-04", + "loss": 0.9579, + "slid_loss": 0.9764, + "step": 1006, + "time": 13.57 + }, + { + "epoch": 0.97, + "learning_rate": "1.9737e-04", + "loss": 0.932, + "slid_loss": 0.9766, + "step": 1007, + "time": 13.15 + }, + { + "epoch": 0.97, + "learning_rate": "1.9736e-04", + "loss": 1.0112, + "slid_loss": 0.9774, + "step": 1008, + "time": 13.82 + }, + { + "epoch": 0.97, + "learning_rate": "1.9735e-04", + "loss": 1.0112, + "slid_loss": 0.9779, + "step": 1009, + "time": 13.48 + }, + { + "epoch": 0.97, + "learning_rate": "1.9733e-04", + "loss": 1.0069, + "slid_loss": 0.9785, + "step": 1010, + "time": 13.25 + }, + { + "epoch": 0.97, + "learning_rate": "1.9732e-04", + "loss": 0.9538, + "slid_loss": 0.9774, + "step": 1011, + "time": 14.06 + }, + { + "epoch": 0.97, + "learning_rate": "1.9731e-04", + "loss": 0.8669, + "slid_loss": 0.9759, + "step": 1012, + "time": 13.4 + }, + { + "epoch": 0.97, + "learning_rate": "1.9730e-04", + "loss": 0.9567, + "slid_loss": 0.9761, + "step": 1013, + "time": 12.22 + }, + { + "epoch": 0.97, + "learning_rate": "1.9729e-04", + "loss": 1.0478, + "slid_loss": 0.9762, + "step": 1014, + "time": 11.7 + }, + { + "epoch": 0.98, + "learning_rate": "1.9728e-04", + "loss": 0.9359, + "slid_loss": 0.9747, + "step": 1015, + "time": 13.29 + }, + { + "epoch": 0.98, + "learning_rate": "1.9727e-04", + "loss": 0.9848, + "slid_loss": 0.9744, + "step": 1016, + "time": 10.49 + }, + { + "epoch": 0.98, + "learning_rate": "1.9726e-04", + "loss": 0.9545, + "slid_loss": 0.9741, + "step": 1017, + "time": 13.67 + }, + { + "epoch": 0.98, + "learning_rate": "1.9725e-04", + "loss": 1.0427, + "slid_loss": 0.974, + "step": 1018, + "time": 12.23 + }, + { + "epoch": 0.98, + "learning_rate": "1.9724e-04", + "loss": 1.0048, + "slid_loss": 0.9735, + "step": 1019, + "time": 14.07 + }, + { + "epoch": 0.98, + "learning_rate": "1.9723e-04", + "loss": 0.9756, + "slid_loss": 0.9737, + "step": 1020, + "time": 11.78 + }, + { + "epoch": 0.98, + "learning_rate": "1.9721e-04", + "loss": 1.0282, + "slid_loss": 0.9738, + "step": 1021, + "time": 12.91 + }, + { + "epoch": 0.98, + "learning_rate": "1.9720e-04", + "loss": 1.1092, + "slid_loss": 0.9757, + "step": 1022, + "time": 13.48 + }, + { + "epoch": 0.98, + "learning_rate": "1.9719e-04", + "loss": 0.998, + "slid_loss": 0.9769, + "step": 1023, + "time": 14.11 + }, + { + "epoch": 0.98, + "learning_rate": "1.9718e-04", + "loss": 0.9313, + "slid_loss": 0.9752, + "step": 1024, + "time": 13.15 + }, + { + "epoch": 0.98, + "learning_rate": "1.9717e-04", + "loss": 0.899, + "slid_loss": 0.9746, + "step": 1025, + "time": 13.8 + }, + { + "epoch": 0.99, + "learning_rate": "1.9716e-04", + "loss": 0.9183, + "slid_loss": 0.9743, + "step": 1026, + "time": 13.37 + }, + { + "epoch": 0.99, + "learning_rate": "1.9715e-04", + "loss": 1.0916, + "slid_loss": 0.9755, + "step": 1027, + "time": 13.83 + }, + { + "epoch": 0.99, + "learning_rate": "1.9714e-04", + "loss": 1.0198, + "slid_loss": 0.9767, + "step": 1028, + "time": 13.39 + }, + { + "epoch": 0.99, + "learning_rate": "1.9713e-04", + "loss": 0.8994, + "slid_loss": 0.9756, + "step": 1029, + "time": 12.52 + }, + { + "epoch": 0.99, + "learning_rate": "1.9711e-04", + "loss": 1.0132, + "slid_loss": 0.9758, + "step": 1030, + "time": 12.58 + }, + { + "epoch": 0.99, + "learning_rate": "1.9710e-04", + "loss": 1.0797, + "slid_loss": 0.9765, + "step": 1031, + "time": 13.81 + }, + { + "epoch": 0.99, + "learning_rate": "1.9709e-04", + "loss": 0.9583, + "slid_loss": 0.9759, + "step": 1032, + "time": 11.87 + }, + { + "epoch": 0.99, + "learning_rate": "1.9708e-04", + "loss": 0.9373, + "slid_loss": 0.9758, + "step": 1033, + "time": 13.53 + }, + { + "epoch": 0.99, + "learning_rate": "1.9707e-04", + "loss": 0.95, + "slid_loss": 0.9746, + "step": 1034, + "time": 13.42 + }, + { + "epoch": 0.99, + "learning_rate": "1.9706e-04", + "loss": 0.9967, + "slid_loss": 0.9758, + "step": 1035, + "time": 12.74 + }, + { + "epoch": 1.0, + "learning_rate": "1.9705e-04", + "loss": 1.0089, + "slid_loss": 0.976, + "step": 1036, + "time": 12.37 + }, + { + "epoch": 1.0, + "learning_rate": "1.9704e-04", + "loss": 1.0543, + "slid_loss": 0.9768, + "step": 1037, + "time": 13.47 + }, + { + "epoch": 1.0, + "learning_rate": "1.9702e-04", + "loss": 0.9265, + "slid_loss": 0.977, + "step": 1038, + "time": 13.38 + }, + { + "epoch": 1.0, + "learning_rate": "1.9701e-04", + "loss": 1.0738, + "slid_loss": 0.9775, + "step": 1039, + "time": 12.14 + }, + { + "epoch": 1.0, + "learning_rate": "1.9700e-04", + "loss": 1.0418, + "slid_loss": 0.9771, + "step": 1040, + "time": 13.68 + }, + { + "epoch": 1.0, + "learning_rate": "1.9699e-04", + "loss": 0.8768, + "slid_loss": 0.9766, + "step": 1041, + "time": 14.16 + }, + { + "epoch": 1.0, + "learning_rate": "1.9698e-04", + "loss": 0.9259, + "slid_loss": 0.9758, + "step": 1042, + "time": 174.68 + }, + { + "epoch": 1.0, + "learning_rate": "1.9697e-04", + "loss": 0.9491, + "slid_loss": 0.9755, + "step": 1043, + "time": 11.21 + }, + { + "epoch": 1.0, + "learning_rate": "1.9696e-04", + "loss": 0.9508, + "slid_loss": 0.9755, + "step": 1044, + "time": 11.52 + }, + { + "epoch": 1.0, + "learning_rate": "1.9694e-04", + "loss": 0.9902, + "slid_loss": 0.9762, + "step": 1045, + "time": 13.92 + }, + { + "epoch": 1.0, + "learning_rate": "1.9693e-04", + "loss": 0.9966, + "slid_loss": 0.9749, + "step": 1046, + "time": 13.6 + }, + { + "epoch": 1.01, + "learning_rate": "1.9692e-04", + "loss": 0.9123, + "slid_loss": 0.9739, + "step": 1047, + "time": 11.33 + }, + { + "epoch": 1.01, + "learning_rate": "1.9691e-04", + "loss": 0.9404, + "slid_loss": 0.9741, + "step": 1048, + "time": 11.54 + }, + { + "epoch": 1.01, + "learning_rate": "1.9690e-04", + "loss": 0.8762, + "slid_loss": 0.9734, + "step": 1049, + "time": 11.54 + }, + { + "epoch": 1.01, + "learning_rate": "1.9689e-04", + "loss": 0.9187, + "slid_loss": 0.9737, + "step": 1050, + "time": 14.14 + }, + { + "epoch": 1.01, + "learning_rate": "1.9687e-04", + "loss": 0.8716, + "slid_loss": 0.9724, + "step": 1051, + "time": 13.26 + }, + { + "epoch": 1.01, + "learning_rate": "1.9686e-04", + "loss": 0.9457, + "slid_loss": 0.9723, + "step": 1052, + "time": 12.89 + }, + { + "epoch": 1.01, + "learning_rate": "1.9685e-04", + "loss": 0.9017, + "slid_loss": 0.9718, + "step": 1053, + "time": 13.47 + }, + { + "epoch": 1.01, + "learning_rate": "1.9684e-04", + "loss": 0.9662, + "slid_loss": 0.9715, + "step": 1054, + "time": 14.36 + }, + { + "epoch": 1.01, + "learning_rate": "1.9683e-04", + "loss": 0.9353, + "slid_loss": 0.9715, + "step": 1055, + "time": 12.54 + }, + { + "epoch": 1.01, + "learning_rate": "1.9682e-04", + "loss": 0.9734, + "slid_loss": 0.9713, + "step": 1056, + "time": 12.47 + }, + { + "epoch": 1.02, + "learning_rate": "1.9680e-04", + "loss": 1.0055, + "slid_loss": 0.9729, + "step": 1057, + "time": 14.32 + }, + { + "epoch": 1.02, + "learning_rate": "1.9679e-04", + "loss": 0.8676, + "slid_loss": 0.9726, + "step": 1058, + "time": 13.74 + }, + { + "epoch": 1.02, + "learning_rate": "1.9678e-04", + "loss": 0.9367, + "slid_loss": 0.972, + "step": 1059, + "time": 13.19 + }, + { + "epoch": 1.02, + "learning_rate": "1.9677e-04", + "loss": 1.0135, + "slid_loss": 0.9716, + "step": 1060, + "time": 13.2 + }, + { + "epoch": 1.02, + "learning_rate": "1.9676e-04", + "loss": 0.9528, + "slid_loss": 0.9717, + "step": 1061, + "time": 11.01 + }, + { + "epoch": 1.02, + "learning_rate": "1.9674e-04", + "loss": 0.892, + "slid_loss": 0.9714, + "step": 1062, + "time": 13.42 + }, + { + "epoch": 1.02, + "learning_rate": "1.9673e-04", + "loss": 0.9255, + "slid_loss": 0.9705, + "step": 1063, + "time": 11.41 + }, + { + "epoch": 1.02, + "learning_rate": "1.9672e-04", + "loss": 0.9277, + "slid_loss": 0.9698, + "step": 1064, + "time": 14.23 + }, + { + "epoch": 1.02, + "learning_rate": "1.9671e-04", + "loss": 1.1172, + "slid_loss": 0.9713, + "step": 1065, + "time": 13.51 + }, + { + "epoch": 1.02, + "learning_rate": "1.9670e-04", + "loss": 0.9519, + "slid_loss": 0.9705, + "step": 1066, + "time": 13.71 + }, + { + "epoch": 1.02, + "learning_rate": "1.9668e-04", + "loss": 1.0635, + "slid_loss": 0.9711, + "step": 1067, + "time": 14.01 + }, + { + "epoch": 1.03, + "learning_rate": "1.9667e-04", + "loss": 1.0054, + "slid_loss": 0.9717, + "step": 1068, + "time": 12.75 + }, + { + "epoch": 1.03, + "learning_rate": "1.9666e-04", + "loss": 1.0559, + "slid_loss": 0.9732, + "step": 1069, + "time": 13.33 + }, + { + "epoch": 1.03, + "learning_rate": "1.9665e-04", + "loss": 1.0143, + "slid_loss": 0.9741, + "step": 1070, + "time": 14.09 + }, + { + "epoch": 1.03, + "learning_rate": "1.9664e-04", + "loss": 1.0064, + "slid_loss": 0.9739, + "step": 1071, + "time": 12.53 + }, + { + "epoch": 1.03, + "learning_rate": "1.9662e-04", + "loss": 1.0137, + "slid_loss": 0.9745, + "step": 1072, + "time": 13.72 + }, + { + "epoch": 1.03, + "learning_rate": "1.9661e-04", + "loss": 0.9903, + "slid_loss": 0.9746, + "step": 1073, + "time": 13.44 + }, + { + "epoch": 1.03, + "learning_rate": "1.9660e-04", + "loss": 0.9629, + "slid_loss": 0.975, + "step": 1074, + "time": 13.63 + }, + { + "epoch": 1.03, + "learning_rate": "1.9659e-04", + "loss": 0.9543, + "slid_loss": 0.9751, + "step": 1075, + "time": 12.97 + }, + { + "epoch": 1.03, + "learning_rate": "1.9658e-04", + "loss": 0.9168, + "slid_loss": 0.9758, + "step": 1076, + "time": 11.62 + }, + { + "epoch": 1.03, + "learning_rate": "1.9656e-04", + "loss": 0.9888, + "slid_loss": 0.9758, + "step": 1077, + "time": 13.63 + }, + { + "epoch": 1.04, + "learning_rate": "1.9655e-04", + "loss": 0.8354, + "slid_loss": 0.9747, + "step": 1078, + "time": 13.72 + }, + { + "epoch": 1.04, + "learning_rate": "1.9654e-04", + "loss": 0.9042, + "slid_loss": 0.9735, + "step": 1079, + "time": 12.41 + }, + { + "epoch": 1.04, + "learning_rate": "1.9653e-04", + "loss": 1.0398, + "slid_loss": 0.974, + "step": 1080, + "time": 13.93 + }, + { + "epoch": 1.04, + "learning_rate": "1.9651e-04", + "loss": 1.0013, + "slid_loss": 0.9735, + "step": 1081, + "time": 13.17 + }, + { + "epoch": 1.04, + "learning_rate": "1.9650e-04", + "loss": 1.0264, + "slid_loss": 0.9746, + "step": 1082, + "time": 13.16 + }, + { + "epoch": 1.04, + "learning_rate": "1.9649e-04", + "loss": 0.9633, + "slid_loss": 0.9748, + "step": 1083, + "time": 13.34 + }, + { + "epoch": 1.04, + "learning_rate": "1.9648e-04", + "loss": 1.0064, + "slid_loss": 0.9753, + "step": 1084, + "time": 12.8 + }, + { + "epoch": 1.04, + "learning_rate": "1.9647e-04", + "loss": 0.9201, + "slid_loss": 0.9733, + "step": 1085, + "time": 13.65 + }, + { + "epoch": 1.04, + "learning_rate": "1.9645e-04", + "loss": 0.948, + "slid_loss": 0.9725, + "step": 1086, + "time": 14.32 + }, + { + "epoch": 1.04, + "learning_rate": "1.9644e-04", + "loss": 0.9234, + "slid_loss": 0.972, + "step": 1087, + "time": 12.87 + }, + { + "epoch": 1.05, + "learning_rate": "1.9643e-04", + "loss": 1.0118, + "slid_loss": 0.973, + "step": 1088, + "time": 12.84 + }, + { + "epoch": 1.05, + "learning_rate": "1.9642e-04", + "loss": 0.8761, + "slid_loss": 0.9718, + "step": 1089, + "time": 12.36 + }, + { + "epoch": 1.05, + "learning_rate": "1.9640e-04", + "loss": 1.0149, + "slid_loss": 0.9716, + "step": 1090, + "time": 13.18 + }, + { + "epoch": 1.05, + "learning_rate": "1.9639e-04", + "loss": 1.0226, + "slid_loss": 0.971, + "step": 1091, + "time": 13.81 + }, + { + "epoch": 1.05, + "learning_rate": "1.9638e-04", + "loss": 0.9232, + "slid_loss": 0.97, + "step": 1092, + "time": 14.08 + }, + { + "epoch": 1.05, + "learning_rate": "1.9637e-04", + "loss": 0.9565, + "slid_loss": 0.9712, + "step": 1093, + "time": 14.31 + }, + { + "epoch": 1.05, + "learning_rate": "1.9635e-04", + "loss": 0.9235, + "slid_loss": 0.9714, + "step": 1094, + "time": 13.33 + }, + { + "epoch": 1.05, + "learning_rate": "1.9634e-04", + "loss": 0.9145, + "slid_loss": 0.9711, + "step": 1095, + "time": 13.47 + }, + { + "epoch": 1.05, + "learning_rate": "1.9633e-04", + "loss": 0.9809, + "slid_loss": 0.9705, + "step": 1096, + "time": 11.25 + }, + { + "epoch": 1.05, + "learning_rate": "1.9631e-04", + "loss": 0.8829, + "slid_loss": 0.9698, + "step": 1097, + "time": 13.15 + }, + { + "epoch": 1.05, + "learning_rate": "1.9630e-04", + "loss": 0.8922, + "slid_loss": 0.9686, + "step": 1098, + "time": 13.14 + }, + { + "epoch": 1.06, + "learning_rate": "1.9629e-04", + "loss": 0.8461, + "slid_loss": 0.9669, + "step": 1099, + "time": 11.99 + }, + { + "epoch": 1.06, + "learning_rate": "1.9628e-04", + "loss": 0.8706, + "slid_loss": 0.9666, + "step": 1100, + "time": 13.85 + }, + { + "epoch": 1.06, + "learning_rate": "1.9626e-04", + "loss": 0.8988, + "slid_loss": 0.9663, + "step": 1101, + "time": 13.05 + }, + { + "epoch": 1.06, + "learning_rate": "1.9625e-04", + "loss": 0.8927, + "slid_loss": 0.965, + "step": 1102, + "time": 11.57 + }, + { + "epoch": 1.06, + "learning_rate": "1.9624e-04", + "loss": 0.8794, + "slid_loss": 0.9636, + "step": 1103, + "time": 13.03 + }, + { + "epoch": 1.06, + "learning_rate": "1.9623e-04", + "loss": 0.96, + "slid_loss": 0.9638, + "step": 1104, + "time": 13.4 + }, + { + "epoch": 1.06, + "learning_rate": "1.9621e-04", + "loss": 1.049, + "slid_loss": 0.9644, + "step": 1105, + "time": 13.93 + }, + { + "epoch": 1.06, + "learning_rate": "1.9620e-04", + "loss": 1.0336, + "slid_loss": 0.9651, + "step": 1106, + "time": 11.59 + }, + { + "epoch": 1.06, + "learning_rate": "1.9619e-04", + "loss": 1.0636, + "slid_loss": 0.9664, + "step": 1107, + "time": 13.73 + }, + { + "epoch": 1.06, + "learning_rate": "1.9617e-04", + "loss": 0.8595, + "slid_loss": 0.9649, + "step": 1108, + "time": 13.4 + }, + { + "epoch": 1.07, + "learning_rate": "1.9616e-04", + "loss": 0.9778, + "slid_loss": 0.9646, + "step": 1109, + "time": 13.26 + }, + { + "epoch": 1.07, + "learning_rate": "1.9615e-04", + "loss": 0.9504, + "slid_loss": 0.964, + "step": 1110, + "time": 13.24 + }, + { + "epoch": 1.07, + "learning_rate": "1.9614e-04", + "loss": 0.9166, + "slid_loss": 0.9636, + "step": 1111, + "time": 13.84 + }, + { + "epoch": 1.07, + "learning_rate": "1.9612e-04", + "loss": 1.0352, + "slid_loss": 0.9653, + "step": 1112, + "time": 13.81 + }, + { + "epoch": 1.07, + "learning_rate": "1.9611e-04", + "loss": 0.9281, + "slid_loss": 0.965, + "step": 1113, + "time": 13.66 + }, + { + "epoch": 1.07, + "learning_rate": "1.9610e-04", + "loss": 0.9452, + "slid_loss": 0.964, + "step": 1114, + "time": 13.72 + }, + { + "epoch": 1.07, + "learning_rate": "1.9608e-04", + "loss": 1.0631, + "slid_loss": 0.9653, + "step": 1115, + "time": 10.74 + }, + { + "epoch": 1.07, + "learning_rate": "1.9607e-04", + "loss": 0.9703, + "slid_loss": 0.9651, + "step": 1116, + "time": 13.83 + }, + { + "epoch": 1.07, + "learning_rate": "1.9606e-04", + "loss": 0.9034, + "slid_loss": 0.9646, + "step": 1117, + "time": 13.09 + }, + { + "epoch": 1.07, + "learning_rate": "1.9605e-04", + "loss": 0.9517, + "slid_loss": 0.9637, + "step": 1118, + "time": 12.85 + }, + { + "epoch": 1.07, + "learning_rate": "1.9603e-04", + "loss": 0.9328, + "slid_loss": 0.963, + "step": 1119, + "time": 13.23 + }, + { + "epoch": 1.08, + "learning_rate": "1.9602e-04", + "loss": 0.9799, + "slid_loss": 0.963, + "step": 1120, + "time": 13.81 + }, + { + "epoch": 1.08, + "learning_rate": "1.9601e-04", + "loss": 0.8996, + "slid_loss": 0.9618, + "step": 1121, + "time": 15.25 + }, + { + "epoch": 1.08, + "learning_rate": "1.9599e-04", + "loss": 0.856, + "slid_loss": 0.9592, + "step": 1122, + "time": 13.99 + }, + { + "epoch": 1.08, + "learning_rate": "1.9598e-04", + "loss": 1.0382, + "slid_loss": 0.9596, + "step": 1123, + "time": 11.43 + }, + { + "epoch": 1.08, + "learning_rate": "1.9597e-04", + "loss": 1.0235, + "slid_loss": 0.9605, + "step": 1124, + "time": 13.33 + }, + { + "epoch": 1.08, + "learning_rate": "1.9595e-04", + "loss": 0.9985, + "slid_loss": 0.9615, + "step": 1125, + "time": 13.78 + }, + { + "epoch": 1.08, + "learning_rate": "1.9594e-04", + "loss": 1.0725, + "slid_loss": 0.9631, + "step": 1126, + "time": 12.37 + }, + { + "epoch": 1.08, + "learning_rate": "1.9593e-04", + "loss": 1.043, + "slid_loss": 0.9626, + "step": 1127, + "time": 13.7 + }, + { + "epoch": 1.08, + "learning_rate": "1.9591e-04", + "loss": 0.8429, + "slid_loss": 0.9608, + "step": 1128, + "time": 13.49 + }, + { + "epoch": 1.08, + "learning_rate": "1.9590e-04", + "loss": 1.0798, + "slid_loss": 0.9626, + "step": 1129, + "time": 13.68 + }, + { + "epoch": 1.09, + "learning_rate": "1.9589e-04", + "loss": 0.9815, + "slid_loss": 0.9623, + "step": 1130, + "time": 14.3 + }, + { + "epoch": 1.09, + "learning_rate": "1.9587e-04", + "loss": 0.9771, + "slid_loss": 0.9613, + "step": 1131, + "time": 13.41 + }, + { + "epoch": 1.09, + "learning_rate": "1.9586e-04", + "loss": 0.973, + "slid_loss": 0.9614, + "step": 1132, + "time": 12.12 + }, + { + "epoch": 1.09, + "learning_rate": "1.9585e-04", + "loss": 1.017, + "slid_loss": 0.9622, + "step": 1133, + "time": 12.1 + }, + { + "epoch": 1.09, + "learning_rate": "1.9583e-04", + "loss": 0.9913, + "slid_loss": 0.9626, + "step": 1134, + "time": 14.29 + }, + { + "epoch": 1.09, + "learning_rate": "1.9582e-04", + "loss": 0.992, + "slid_loss": 0.9626, + "step": 1135, + "time": 13.85 + }, + { + "epoch": 1.09, + "learning_rate": "1.9581e-04", + "loss": 0.8917, + "slid_loss": 0.9614, + "step": 1136, + "time": 13.53 + }, + { + "epoch": 1.09, + "learning_rate": "1.9579e-04", + "loss": 1.0444, + "slid_loss": 0.9613, + "step": 1137, + "time": 11.6 + }, + { + "epoch": 1.09, + "learning_rate": "1.9578e-04", + "loss": 0.992, + "slid_loss": 0.962, + "step": 1138, + "time": 13.44 + }, + { + "epoch": 1.09, + "learning_rate": "1.9577e-04", + "loss": 0.8975, + "slid_loss": 0.9602, + "step": 1139, + "time": 13.01 + }, + { + "epoch": 1.1, + "learning_rate": "1.9575e-04", + "loss": 0.9197, + "slid_loss": 0.959, + "step": 1140, + "time": 13.78 + }, + { + "epoch": 1.1, + "learning_rate": "1.9574e-04", + "loss": 0.9219, + "slid_loss": 0.9594, + "step": 1141, + "time": 13.64 + }, + { + "epoch": 1.1, + "learning_rate": "1.9573e-04", + "loss": 0.9325, + "slid_loss": 0.9595, + "step": 1142, + "time": 12.92 + }, + { + "epoch": 1.1, + "learning_rate": "1.9571e-04", + "loss": 0.9382, + "slid_loss": 0.9594, + "step": 1143, + "time": 12.89 + }, + { + "epoch": 1.1, + "learning_rate": "1.9570e-04", + "loss": 0.9687, + "slid_loss": 0.9596, + "step": 1144, + "time": 10.61 + }, + { + "epoch": 1.1, + "learning_rate": "1.9568e-04", + "loss": 0.9943, + "slid_loss": 0.9596, + "step": 1145, + "time": 13.34 + }, + { + "epoch": 1.1, + "learning_rate": "1.9567e-04", + "loss": 0.8942, + "slid_loss": 0.9586, + "step": 1146, + "time": 13.59 + }, + { + "epoch": 1.1, + "learning_rate": "1.9566e-04", + "loss": 0.8806, + "slid_loss": 0.9583, + "step": 1147, + "time": 13.98 + }, + { + "epoch": 1.1, + "learning_rate": "1.9564e-04", + "loss": 1.0034, + "slid_loss": 0.9589, + "step": 1148, + "time": 11.51 + }, + { + "epoch": 1.1, + "learning_rate": "1.9563e-04", + "loss": 0.8329, + "slid_loss": 0.9585, + "step": 1149, + "time": 11.54 + }, + { + "epoch": 1.1, + "learning_rate": "1.9562e-04", + "loss": 1.064, + "slid_loss": 0.9599, + "step": 1150, + "time": 12.67 + }, + { + "epoch": 1.11, + "learning_rate": "1.9560e-04", + "loss": 0.891, + "slid_loss": 0.9601, + "step": 1151, + "time": 13.32 + }, + { + "epoch": 1.11, + "learning_rate": "1.9559e-04", + "loss": 0.9212, + "slid_loss": 0.9599, + "step": 1152, + "time": 13.81 + }, + { + "epoch": 1.11, + "learning_rate": "1.9557e-04", + "loss": 0.9629, + "slid_loss": 0.9605, + "step": 1153, + "time": 11.28 + }, + { + "epoch": 1.11, + "learning_rate": "1.9556e-04", + "loss": 0.9159, + "slid_loss": 0.96, + "step": 1154, + "time": 13.57 + }, + { + "epoch": 1.11, + "learning_rate": "1.9555e-04", + "loss": 0.8557, + "slid_loss": 0.9592, + "step": 1155, + "time": 13.52 + }, + { + "epoch": 1.11, + "learning_rate": "1.9553e-04", + "loss": 0.9897, + "slid_loss": 0.9594, + "step": 1156, + "time": 13.73 + }, + { + "epoch": 1.11, + "learning_rate": "1.9552e-04", + "loss": 0.9237, + "slid_loss": 0.9585, + "step": 1157, + "time": 12.28 + }, + { + "epoch": 1.11, + "learning_rate": "1.9551e-04", + "loss": 1.0264, + "slid_loss": 0.9601, + "step": 1158, + "time": 14.53 + }, + { + "epoch": 1.11, + "learning_rate": "1.9549e-04", + "loss": 0.9369, + "slid_loss": 0.9601, + "step": 1159, + "time": 13.57 + }, + { + "epoch": 1.11, + "learning_rate": "1.9548e-04", + "loss": 0.8212, + "slid_loss": 0.9582, + "step": 1160, + "time": 11.96 + }, + { + "epoch": 1.12, + "learning_rate": "1.9546e-04", + "loss": 1.0195, + "slid_loss": 0.9589, + "step": 1161, + "time": 13.29 + }, + { + "epoch": 1.12, + "learning_rate": "1.9545e-04", + "loss": 0.9329, + "slid_loss": 0.9593, + "step": 1162, + "time": 13.68 + }, + { + "epoch": 1.12, + "learning_rate": "1.9544e-04", + "loss": 1.0429, + "slid_loss": 0.9605, + "step": 1163, + "time": 11.42 + }, + { + "epoch": 1.12, + "learning_rate": "1.9542e-04", + "loss": 0.8298, + "slid_loss": 0.9595, + "step": 1164, + "time": 11.68 + }, + { + "epoch": 1.12, + "learning_rate": "1.9541e-04", + "loss": 0.9673, + "slid_loss": 0.958, + "step": 1165, + "time": 11.66 + }, + { + "epoch": 1.12, + "learning_rate": "1.9539e-04", + "loss": 1.0574, + "slid_loss": 0.959, + "step": 1166, + "time": 13.56 + }, + { + "epoch": 1.12, + "learning_rate": "1.9538e-04", + "loss": 0.9713, + "slid_loss": 0.9581, + "step": 1167, + "time": 13.36 + }, + { + "epoch": 1.12, + "learning_rate": "1.9537e-04", + "loss": 0.837, + "slid_loss": 0.9564, + "step": 1168, + "time": 11.18 + }, + { + "epoch": 1.12, + "learning_rate": "1.9535e-04", + "loss": 0.9485, + "slid_loss": 0.9554, + "step": 1169, + "time": 13.92 + }, + { + "epoch": 1.12, + "learning_rate": "1.9534e-04", + "loss": 1.0021, + "slid_loss": 0.9552, + "step": 1170, + "time": 13.53 + }, + { + "epoch": 1.12, + "learning_rate": "1.9532e-04", + "loss": 0.9, + "slid_loss": 0.9542, + "step": 1171, + "time": 13.61 + }, + { + "epoch": 1.13, + "learning_rate": "1.9531e-04", + "loss": 0.9576, + "slid_loss": 0.9536, + "step": 1172, + "time": 13.84 + }, + { + "epoch": 1.13, + "learning_rate": "1.9529e-04", + "loss": 0.9676, + "slid_loss": 0.9534, + "step": 1173, + "time": 13.19 + }, + { + "epoch": 1.13, + "learning_rate": "1.9528e-04", + "loss": 0.9994, + "slid_loss": 0.9537, + "step": 1174, + "time": 13.22 + }, + { + "epoch": 1.13, + "learning_rate": "1.9527e-04", + "loss": 1.002, + "slid_loss": 0.9542, + "step": 1175, + "time": 10.91 + }, + { + "epoch": 1.13, + "learning_rate": "1.9525e-04", + "loss": 0.8348, + "slid_loss": 0.9534, + "step": 1176, + "time": 12.43 + }, + { + "epoch": 1.13, + "learning_rate": "1.9524e-04", + "loss": 0.9222, + "slid_loss": 0.9527, + "step": 1177, + "time": 13.39 + }, + { + "epoch": 1.13, + "learning_rate": "1.9522e-04", + "loss": 0.9225, + "slid_loss": 0.9536, + "step": 1178, + "time": 13.56 + }, + { + "epoch": 1.13, + "learning_rate": "1.9521e-04", + "loss": 0.9172, + "slid_loss": 0.9537, + "step": 1179, + "time": 13.7 + }, + { + "epoch": 1.13, + "learning_rate": "1.9519e-04", + "loss": 1.0012, + "slid_loss": 0.9534, + "step": 1180, + "time": 13.84 + }, + { + "epoch": 1.13, + "learning_rate": "1.9518e-04", + "loss": 0.9266, + "slid_loss": 0.9526, + "step": 1181, + "time": 12.83 + }, + { + "epoch": 1.14, + "learning_rate": "1.9517e-04", + "loss": 0.9005, + "slid_loss": 0.9514, + "step": 1182, + "time": 11.23 + }, + { + "epoch": 1.14, + "learning_rate": "1.9515e-04", + "loss": 1.0599, + "slid_loss": 0.9523, + "step": 1183, + "time": 13.3 + }, + { + "epoch": 1.14, + "learning_rate": "1.9514e-04", + "loss": 0.8789, + "slid_loss": 0.951, + "step": 1184, + "time": 13.23 + }, + { + "epoch": 1.14, + "learning_rate": "1.9512e-04", + "loss": 0.9715, + "slid_loss": 0.9516, + "step": 1185, + "time": 11.03 + }, + { + "epoch": 1.14, + "learning_rate": "1.9511e-04", + "loss": 0.9176, + "slid_loss": 0.9513, + "step": 1186, + "time": 13.68 + }, + { + "epoch": 1.14, + "learning_rate": "1.9509e-04", + "loss": 0.9692, + "slid_loss": 0.9517, + "step": 1187, + "time": 13.73 + }, + { + "epoch": 1.14, + "learning_rate": "1.9508e-04", + "loss": 0.8751, + "slid_loss": 0.9503, + "step": 1188, + "time": 12.8 + }, + { + "epoch": 1.14, + "learning_rate": "1.9507e-04", + "loss": 0.8907, + "slid_loss": 0.9505, + "step": 1189, + "time": 12.04 + }, + { + "epoch": 1.14, + "learning_rate": "1.9505e-04", + "loss": 0.9373, + "slid_loss": 0.9497, + "step": 1190, + "time": 13.78 + }, + { + "epoch": 1.14, + "learning_rate": "1.9504e-04", + "loss": 1.0075, + "slid_loss": 0.9496, + "step": 1191, + "time": 12.29 + }, + { + "epoch": 1.15, + "learning_rate": "1.9502e-04", + "loss": 0.9801, + "slid_loss": 0.9501, + "step": 1192, + "time": 13.18 + }, + { + "epoch": 1.15, + "learning_rate": "1.9501e-04", + "loss": 0.9439, + "slid_loss": 0.95, + "step": 1193, + "time": 13.8 + }, + { + "epoch": 1.15, + "learning_rate": "1.9499e-04", + "loss": 0.7925, + "slid_loss": 0.9487, + "step": 1194, + "time": 13.76 + }, + { + "epoch": 1.15, + "learning_rate": "1.9498e-04", + "loss": 0.9536, + "slid_loss": 0.9491, + "step": 1195, + "time": 14.09 + }, + { + "epoch": 1.15, + "learning_rate": "1.9496e-04", + "loss": 0.8415, + "slid_loss": 0.9477, + "step": 1196, + "time": 13.81 + }, + { + "epoch": 1.15, + "learning_rate": "1.9495e-04", + "loss": 1.001, + "slid_loss": 0.9489, + "step": 1197, + "time": 13.26 + }, + { + "epoch": 1.15, + "learning_rate": "1.9493e-04", + "loss": 0.9278, + "slid_loss": 0.9492, + "step": 1198, + "time": 14.02 + }, + { + "epoch": 1.15, + "learning_rate": "1.9492e-04", + "loss": 0.967, + "slid_loss": 0.9504, + "step": 1199, + "time": 13.37 + }, + { + "epoch": 1.15, + "learning_rate": "1.9490e-04", + "loss": 0.8811, + "slid_loss": 0.9505, + "step": 1200, + "time": 13.49 + }, + { + "epoch": 1.15, + "learning_rate": "1.9489e-04", + "loss": 1.0216, + "slid_loss": 0.9518, + "step": 1201, + "time": 13.41 + }, + { + "epoch": 1.15, + "learning_rate": "1.9487e-04", + "loss": 0.9399, + "slid_loss": 0.9522, + "step": 1202, + "time": 13.81 + }, + { + "epoch": 1.16, + "learning_rate": "1.9486e-04", + "loss": 0.9094, + "slid_loss": 0.9525, + "step": 1203, + "time": 13.37 + }, + { + "epoch": 1.16, + "learning_rate": "1.9484e-04", + "loss": 0.9303, + "slid_loss": 0.9522, + "step": 1204, + "time": 13.23 + }, + { + "epoch": 1.16, + "learning_rate": "1.9483e-04", + "loss": 0.9021, + "slid_loss": 0.9508, + "step": 1205, + "time": 13.03 + }, + { + "epoch": 1.16, + "learning_rate": "1.9482e-04", + "loss": 0.9068, + "slid_loss": 0.9495, + "step": 1206, + "time": 13.57 + }, + { + "epoch": 1.16, + "learning_rate": "1.9480e-04", + "loss": 0.9538, + "slid_loss": 0.9484, + "step": 1207, + "time": 12.06 + }, + { + "epoch": 1.16, + "learning_rate": "1.9479e-04", + "loss": 1.0151, + "slid_loss": 0.95, + "step": 1208, + "time": 13.38 + }, + { + "epoch": 1.16, + "learning_rate": "1.9477e-04", + "loss": 0.8581, + "slid_loss": 0.9488, + "step": 1209, + "time": 13.9 + }, + { + "epoch": 1.16, + "learning_rate": "1.9476e-04", + "loss": 0.9223, + "slid_loss": 0.9485, + "step": 1210, + "time": 12.19 + }, + { + "epoch": 1.16, + "learning_rate": "1.9474e-04", + "loss": 0.8783, + "slid_loss": 0.9481, + "step": 1211, + "time": 12.29 + }, + { + "epoch": 1.16, + "learning_rate": "1.9473e-04", + "loss": 0.9753, + "slid_loss": 0.9475, + "step": 1212, + "time": 13.4 + }, + { + "epoch": 1.17, + "learning_rate": "1.9471e-04", + "loss": 0.9114, + "slid_loss": 0.9473, + "step": 1213, + "time": 14.06 + }, + { + "epoch": 1.17, + "learning_rate": "1.9470e-04", + "loss": 0.9717, + "slid_loss": 0.9476, + "step": 1214, + "time": 13.87 + }, + { + "epoch": 1.17, + "learning_rate": "1.9468e-04", + "loss": 0.8711, + "slid_loss": 0.9457, + "step": 1215, + "time": 13.3 + }, + { + "epoch": 1.17, + "learning_rate": "1.9467e-04", + "loss": 0.9072, + "slid_loss": 0.9451, + "step": 1216, + "time": 13.39 + }, + { + "epoch": 1.17, + "learning_rate": "1.9465e-04", + "loss": 0.8867, + "slid_loss": 0.9449, + "step": 1217, + "time": 12.99 + }, + { + "epoch": 1.17, + "learning_rate": "1.9464e-04", + "loss": 1.0917, + "slid_loss": 0.9463, + "step": 1218, + "time": 12.91 + }, + { + "epoch": 1.17, + "learning_rate": "1.9462e-04", + "loss": 0.9549, + "slid_loss": 0.9465, + "step": 1219, + "time": 13.33 + }, + { + "epoch": 1.17, + "learning_rate": "1.9461e-04", + "loss": 0.9797, + "slid_loss": 0.9465, + "step": 1220, + "time": 11.99 + }, + { + "epoch": 1.17, + "learning_rate": "1.9459e-04", + "loss": 0.8945, + "slid_loss": 0.9465, + "step": 1221, + "time": 12.6 + }, + { + "epoch": 1.17, + "learning_rate": "1.9457e-04", + "loss": 1.0062, + "slid_loss": 0.948, + "step": 1222, + "time": 13.24 + }, + { + "epoch": 1.17, + "learning_rate": "1.9456e-04", + "loss": 0.9828, + "slid_loss": 0.9474, + "step": 1223, + "time": 12.81 + }, + { + "epoch": 1.18, + "learning_rate": "1.9454e-04", + "loss": 0.9117, + "slid_loss": 0.9463, + "step": 1224, + "time": 12.93 + }, + { + "epoch": 1.18, + "learning_rate": "1.9453e-04", + "loss": 0.9372, + "slid_loss": 0.9457, + "step": 1225, + "time": 12.33 + }, + { + "epoch": 1.18, + "learning_rate": "1.9451e-04", + "loss": 0.8593, + "slid_loss": 0.9435, + "step": 1226, + "time": 12.15 + }, + { + "epoch": 1.18, + "learning_rate": "1.9450e-04", + "loss": 0.8272, + "slid_loss": 0.9414, + "step": 1227, + "time": 13.16 + }, + { + "epoch": 1.18, + "learning_rate": "1.9448e-04", + "loss": 0.9126, + "slid_loss": 0.9421, + "step": 1228, + "time": 13.34 + }, + { + "epoch": 1.18, + "learning_rate": "1.9447e-04", + "loss": 0.9673, + "slid_loss": 0.941, + "step": 1229, + "time": 13.63 + }, + { + "epoch": 1.18, + "learning_rate": "1.9445e-04", + "loss": 0.9833, + "slid_loss": 0.941, + "step": 1230, + "time": 12.94 + }, + { + "epoch": 1.18, + "learning_rate": "1.9444e-04", + "loss": 0.91, + "slid_loss": 0.9403, + "step": 1231, + "time": 13.81 + }, + { + "epoch": 1.18, + "learning_rate": "1.9442e-04", + "loss": 0.8445, + "slid_loss": 0.939, + "step": 1232, + "time": 12.5 + }, + { + "epoch": 1.18, + "learning_rate": "1.9441e-04", + "loss": 0.9207, + "slid_loss": 0.9381, + "step": 1233, + "time": 13.67 + }, + { + "epoch": 1.19, + "learning_rate": "1.9439e-04", + "loss": 0.9945, + "slid_loss": 0.9381, + "step": 1234, + "time": 13.47 + }, + { + "epoch": 1.19, + "learning_rate": "1.9438e-04", + "loss": 0.929, + "slid_loss": 0.9375, + "step": 1235, + "time": 13.48 + }, + { + "epoch": 1.19, + "learning_rate": "1.9436e-04", + "loss": 1.0489, + "slid_loss": 0.939, + "step": 1236, + "time": 12.77 + }, + { + "epoch": 1.19, + "learning_rate": "1.9434e-04", + "loss": 0.8969, + "slid_loss": 0.9376, + "step": 1237, + "time": 13.3 + }, + { + "epoch": 1.19, + "learning_rate": "1.9433e-04", + "loss": 0.8192, + "slid_loss": 0.9358, + "step": 1238, + "time": 12.33 + }, + { + "epoch": 1.19, + "learning_rate": "1.9431e-04", + "loss": 0.9099, + "slid_loss": 0.9359, + "step": 1239, + "time": 13.9 + }, + { + "epoch": 1.19, + "learning_rate": "1.9430e-04", + "loss": 0.8734, + "slid_loss": 0.9355, + "step": 1240, + "time": 13.06 + }, + { + "epoch": 1.19, + "learning_rate": "1.9428e-04", + "loss": 1.0521, + "slid_loss": 0.9368, + "step": 1241, + "time": 13.76 + }, + { + "epoch": 1.19, + "learning_rate": "1.9427e-04", + "loss": 0.9104, + "slid_loss": 0.9366, + "step": 1242, + "time": 12.37 + }, + { + "epoch": 1.19, + "learning_rate": "1.9425e-04", + "loss": 0.9726, + "slid_loss": 0.9369, + "step": 1243, + "time": 12.34 + }, + { + "epoch": 1.2, + "learning_rate": "1.9424e-04", + "loss": 0.9849, + "slid_loss": 0.9371, + "step": 1244, + "time": 13.33 + }, + { + "epoch": 1.2, + "learning_rate": "1.9422e-04", + "loss": 0.969, + "slid_loss": 0.9368, + "step": 1245, + "time": 13.84 + }, + { + "epoch": 1.2, + "learning_rate": "1.9420e-04", + "loss": 0.9812, + "slid_loss": 0.9377, + "step": 1246, + "time": 12.12 + }, + { + "epoch": 1.2, + "learning_rate": "1.9419e-04", + "loss": 0.9618, + "slid_loss": 0.9385, + "step": 1247, + "time": 13.37 + }, + { + "epoch": 1.2, + "learning_rate": "1.9417e-04", + "loss": 1.0197, + "slid_loss": 0.9387, + "step": 1248, + "time": 12.94 + }, + { + "epoch": 1.2, + "learning_rate": "1.9416e-04", + "loss": 0.922, + "slid_loss": 0.9396, + "step": 1249, + "time": 13.67 + }, + { + "epoch": 1.2, + "learning_rate": "1.9414e-04", + "loss": 0.9085, + "slid_loss": 0.938, + "step": 1250, + "time": 13.54 + }, + { + "epoch": 1.2, + "learning_rate": "1.9413e-04", + "loss": 0.9196, + "slid_loss": 0.9383, + "step": 1251, + "time": 12.69 + }, + { + "epoch": 1.2, + "learning_rate": "1.9411e-04", + "loss": 0.9158, + "slid_loss": 0.9382, + "step": 1252, + "time": 14.82 + }, + { + "epoch": 1.2, + "learning_rate": "1.9409e-04", + "loss": 0.8973, + "slid_loss": 0.9376, + "step": 1253, + "time": 13.16 + }, + { + "epoch": 1.2, + "learning_rate": "1.9408e-04", + "loss": 1.0365, + "slid_loss": 0.9388, + "step": 1254, + "time": 13.36 + }, + { + "epoch": 1.21, + "learning_rate": "1.9406e-04", + "loss": 0.9074, + "slid_loss": 0.9393, + "step": 1255, + "time": 13.29 + }, + { + "epoch": 1.21, + "learning_rate": "1.9405e-04", + "loss": 0.9828, + "slid_loss": 0.9392, + "step": 1256, + "time": 13.41 + }, + { + "epoch": 1.21, + "learning_rate": "1.9403e-04", + "loss": 0.8463, + "slid_loss": 0.9385, + "step": 1257, + "time": 13.27 + }, + { + "epoch": 1.21, + "learning_rate": "1.9401e-04", + "loss": 0.9472, + "slid_loss": 0.9377, + "step": 1258, + "time": 12.79 + }, + { + "epoch": 1.21, + "learning_rate": "1.9400e-04", + "loss": 0.8637, + "slid_loss": 0.9369, + "step": 1259, + "time": 11.97 + }, + { + "epoch": 1.21, + "learning_rate": "1.9398e-04", + "loss": 0.9316, + "slid_loss": 0.938, + "step": 1260, + "time": 13.49 + }, + { + "epoch": 1.21, + "learning_rate": "1.9397e-04", + "loss": 0.9107, + "slid_loss": 0.9369, + "step": 1261, + "time": 13.72 + }, + { + "epoch": 1.21, + "learning_rate": "1.9395e-04", + "loss": 1.0101, + "slid_loss": 0.9377, + "step": 1262, + "time": 14.05 + }, + { + "epoch": 1.21, + "learning_rate": "1.9393e-04", + "loss": 0.8851, + "slid_loss": 0.9361, + "step": 1263, + "time": 12.93 + }, + { + "epoch": 1.21, + "learning_rate": "1.9392e-04", + "loss": 0.9462, + "slid_loss": 0.9373, + "step": 1264, + "time": 13.76 + }, + { + "epoch": 1.22, + "learning_rate": "1.9390e-04", + "loss": 0.8597, + "slid_loss": 0.9362, + "step": 1265, + "time": 12.92 + }, + { + "epoch": 1.22, + "learning_rate": "1.9389e-04", + "loss": 0.9908, + "slid_loss": 0.9356, + "step": 1266, + "time": 13.31 + }, + { + "epoch": 1.22, + "learning_rate": "1.9387e-04", + "loss": 0.9866, + "slid_loss": 0.9357, + "step": 1267, + "time": 12.83 + }, + { + "epoch": 1.22, + "learning_rate": "1.9385e-04", + "loss": 1.0546, + "slid_loss": 0.9379, + "step": 1268, + "time": 13.26 + }, + { + "epoch": 1.22, + "learning_rate": "1.9384e-04", + "loss": 0.9093, + "slid_loss": 0.9375, + "step": 1269, + "time": 13.78 + }, + { + "epoch": 1.22, + "learning_rate": "1.9382e-04", + "loss": 0.927, + "slid_loss": 0.9367, + "step": 1270, + "time": 12.66 + }, + { + "epoch": 1.22, + "learning_rate": "1.9381e-04", + "loss": 0.9112, + "slid_loss": 0.9369, + "step": 1271, + "time": 14.14 + }, + { + "epoch": 1.22, + "learning_rate": "1.9379e-04", + "loss": 0.9583, + "slid_loss": 0.9369, + "step": 1272, + "time": 12.93 + }, + { + "epoch": 1.22, + "learning_rate": "1.9377e-04", + "loss": 0.8529, + "slid_loss": 0.9357, + "step": 1273, + "time": 11.22 + }, + { + "epoch": 1.22, + "learning_rate": "1.9376e-04", + "loss": 0.8961, + "slid_loss": 0.9347, + "step": 1274, + "time": 13.82 + }, + { + "epoch": 1.22, + "learning_rate": "1.9374e-04", + "loss": 0.9146, + "slid_loss": 0.9338, + "step": 1275, + "time": 12.86 + }, + { + "epoch": 1.23, + "learning_rate": "1.9373e-04", + "loss": 0.7996, + "slid_loss": 0.9335, + "step": 1276, + "time": 13.2 + }, + { + "epoch": 1.23, + "learning_rate": "1.9371e-04", + "loss": 0.91, + "slid_loss": 0.9333, + "step": 1277, + "time": 11.2 + }, + { + "epoch": 1.23, + "learning_rate": "1.9369e-04", + "loss": 0.9831, + "slid_loss": 0.9339, + "step": 1278, + "time": 13.9 + }, + { + "epoch": 1.23, + "learning_rate": "1.9368e-04", + "loss": 0.8653, + "slid_loss": 0.9334, + "step": 1279, + "time": 13.81 + }, + { + "epoch": 1.23, + "learning_rate": "1.9366e-04", + "loss": 0.9086, + "slid_loss": 0.9325, + "step": 1280, + "time": 12.87 + }, + { + "epoch": 1.23, + "learning_rate": "1.9364e-04", + "loss": 0.9261, + "slid_loss": 0.9325, + "step": 1281, + "time": 13.7 + }, + { + "epoch": 1.23, + "learning_rate": "1.9363e-04", + "loss": 0.8862, + "slid_loss": 0.9323, + "step": 1282, + "time": 12.51 + }, + { + "epoch": 1.23, + "learning_rate": "1.9361e-04", + "loss": 0.9772, + "slid_loss": 0.9315, + "step": 1283, + "time": 12.96 + }, + { + "epoch": 1.23, + "learning_rate": "1.9359e-04", + "loss": 0.9081, + "slid_loss": 0.9318, + "step": 1284, + "time": 13.25 + }, + { + "epoch": 1.23, + "learning_rate": "1.9358e-04", + "loss": 0.8749, + "slid_loss": 0.9308, + "step": 1285, + "time": 14.41 + }, + { + "epoch": 1.24, + "learning_rate": "1.9356e-04", + "loss": 0.8213, + "slid_loss": 0.9299, + "step": 1286, + "time": 13.44 + }, + { + "epoch": 1.24, + "learning_rate": "1.9355e-04", + "loss": 0.9318, + "slid_loss": 0.9295, + "step": 1287, + "time": 11.23 + }, + { + "epoch": 1.24, + "learning_rate": "1.9353e-04", + "loss": 1.0045, + "slid_loss": 0.9308, + "step": 1288, + "time": 13.33 + }, + { + "epoch": 1.24, + "learning_rate": "1.9351e-04", + "loss": 0.9516, + "slid_loss": 0.9314, + "step": 1289, + "time": 14.13 + }, + { + "epoch": 1.24, + "learning_rate": "1.9350e-04", + "loss": 0.9447, + "slid_loss": 0.9315, + "step": 1290, + "time": 13.4 + }, + { + "epoch": 1.24, + "learning_rate": "1.9348e-04", + "loss": 0.9711, + "slid_loss": 0.9311, + "step": 1291, + "time": 11.32 + }, + { + "epoch": 1.24, + "learning_rate": "1.9346e-04", + "loss": 0.9436, + "slid_loss": 0.9308, + "step": 1292, + "time": 13.95 + }, + { + "epoch": 1.24, + "learning_rate": "1.9345e-04", + "loss": 0.9589, + "slid_loss": 0.9309, + "step": 1293, + "time": 13.25 + }, + { + "epoch": 1.24, + "learning_rate": "1.9343e-04", + "loss": 0.8479, + "slid_loss": 0.9315, + "step": 1294, + "time": 12.98 + }, + { + "epoch": 1.24, + "learning_rate": "1.9341e-04", + "loss": 0.9897, + "slid_loss": 0.9318, + "step": 1295, + "time": 11.22 + }, + { + "epoch": 1.24, + "learning_rate": "1.9340e-04", + "loss": 0.9148, + "slid_loss": 0.9326, + "step": 1296, + "time": 11.82 + }, + { + "epoch": 1.25, + "learning_rate": "1.9338e-04", + "loss": 1.0235, + "slid_loss": 0.9328, + "step": 1297, + "time": 13.63 + }, + { + "epoch": 1.25, + "learning_rate": "1.9336e-04", + "loss": 0.9835, + "slid_loss": 0.9333, + "step": 1298, + "time": 11.66 + }, + { + "epoch": 1.25, + "learning_rate": "1.9335e-04", + "loss": 0.9062, + "slid_loss": 0.9327, + "step": 1299, + "time": 12.05 + }, + { + "epoch": 1.25, + "learning_rate": "1.9333e-04", + "loss": 0.9723, + "slid_loss": 0.9336, + "step": 1300, + "time": 13.91 + }, + { + "epoch": 1.25, + "learning_rate": "1.9331e-04", + "loss": 0.9532, + "slid_loss": 0.933, + "step": 1301, + "time": 11.9 + }, + { + "epoch": 1.25, + "learning_rate": "1.9330e-04", + "loss": 0.9004, + "slid_loss": 0.9326, + "step": 1302, + "time": 13.8 + }, + { + "epoch": 1.25, + "learning_rate": "1.9328e-04", + "loss": 0.9414, + "slid_loss": 0.9329, + "step": 1303, + "time": 12.35 + }, + { + "epoch": 1.25, + "learning_rate": "1.9326e-04", + "loss": 0.9436, + "slid_loss": 0.933, + "step": 1304, + "time": 13.5 + }, + { + "epoch": 1.25, + "learning_rate": "1.9325e-04", + "loss": 0.8238, + "slid_loss": 0.9322, + "step": 1305, + "time": 11.37 + }, + { + "epoch": 1.25, + "learning_rate": "1.9323e-04", + "loss": 1.0016, + "slid_loss": 0.9332, + "step": 1306, + "time": 12.64 + }, + { + "epoch": 1.26, + "learning_rate": "1.9321e-04", + "loss": 0.9085, + "slid_loss": 0.9327, + "step": 1307, + "time": 13.22 + }, + { + "epoch": 1.26, + "learning_rate": "1.9319e-04", + "loss": 0.9078, + "slid_loss": 0.9317, + "step": 1308, + "time": 13.01 + }, + { + "epoch": 1.26, + "learning_rate": "1.9318e-04", + "loss": 0.9309, + "slid_loss": 0.9324, + "step": 1309, + "time": 13.63 + }, + { + "epoch": 1.26, + "learning_rate": "1.9316e-04", + "loss": 0.891, + "slid_loss": 0.9321, + "step": 1310, + "time": 13.21 + }, + { + "epoch": 1.26, + "learning_rate": "1.9314e-04", + "loss": 0.9598, + "slid_loss": 0.9329, + "step": 1311, + "time": 14.35 + }, + { + "epoch": 1.26, + "learning_rate": "1.9313e-04", + "loss": 0.9336, + "slid_loss": 0.9325, + "step": 1312, + "time": 12.97 + }, + { + "epoch": 1.26, + "learning_rate": "1.9311e-04", + "loss": 0.9445, + "slid_loss": 0.9328, + "step": 1313, + "time": 13.43 + }, + { + "epoch": 1.26, + "learning_rate": "1.9309e-04", + "loss": 0.8321, + "slid_loss": 0.9314, + "step": 1314, + "time": 12.81 + }, + { + "epoch": 1.26, + "learning_rate": "1.9308e-04", + "loss": 0.9243, + "slid_loss": 0.9319, + "step": 1315, + "time": 10.95 + }, + { + "epoch": 1.26, + "learning_rate": "1.9306e-04", + "loss": 0.9683, + "slid_loss": 0.9325, + "step": 1316, + "time": 12.56 + }, + { + "epoch": 1.27, + "learning_rate": "1.9304e-04", + "loss": 0.9784, + "slid_loss": 0.9335, + "step": 1317, + "time": 13.67 + }, + { + "epoch": 1.27, + "learning_rate": "1.9302e-04", + "loss": 0.9859, + "slid_loss": 0.9324, + "step": 1318, + "time": 13.12 + }, + { + "epoch": 1.27, + "learning_rate": "1.9301e-04", + "loss": 0.9041, + "slid_loss": 0.9319, + "step": 1319, + "time": 11.59 + }, + { + "epoch": 1.27, + "learning_rate": "1.9299e-04", + "loss": 0.9599, + "slid_loss": 0.9317, + "step": 1320, + "time": 13.67 + }, + { + "epoch": 1.27, + "learning_rate": "1.9297e-04", + "loss": 0.9074, + "slid_loss": 0.9318, + "step": 1321, + "time": 13.88 + }, + { + "epoch": 1.27, + "learning_rate": "1.9296e-04", + "loss": 0.9607, + "slid_loss": 0.9314, + "step": 1322, + "time": 13.21 + }, + { + "epoch": 1.27, + "learning_rate": "1.9294e-04", + "loss": 0.8908, + "slid_loss": 0.9305, + "step": 1323, + "time": 13.28 + }, + { + "epoch": 1.27, + "learning_rate": "1.9292e-04", + "loss": 1.0095, + "slid_loss": 0.9314, + "step": 1324, + "time": 12.29 + }, + { + "epoch": 1.27, + "learning_rate": "1.9290e-04", + "loss": 0.9919, + "slid_loss": 0.932, + "step": 1325, + "time": 12.97 + }, + { + "epoch": 1.27, + "learning_rate": "1.9289e-04", + "loss": 0.9379, + "slid_loss": 0.9328, + "step": 1326, + "time": 12.77 + }, + { + "epoch": 1.27, + "learning_rate": "1.9287e-04", + "loss": 0.919, + "slid_loss": 0.9337, + "step": 1327, + "time": 13.89 + }, + { + "epoch": 1.28, + "learning_rate": "1.9285e-04", + "loss": 0.8798, + "slid_loss": 0.9334, + "step": 1328, + "time": 11.31 + }, + { + "epoch": 1.28, + "learning_rate": "1.9284e-04", + "loss": 0.8813, + "slid_loss": 0.9325, + "step": 1329, + "time": 13.22 + }, + { + "epoch": 1.28, + "learning_rate": "1.9282e-04", + "loss": 0.8782, + "slid_loss": 0.9314, + "step": 1330, + "time": 12.82 + }, + { + "epoch": 1.28, + "learning_rate": "1.9280e-04", + "loss": 0.8523, + "slid_loss": 0.9309, + "step": 1331, + "time": 11.85 + }, + { + "epoch": 1.28, + "learning_rate": "1.9278e-04", + "loss": 0.8792, + "slid_loss": 0.9312, + "step": 1332, + "time": 13.49 + }, + { + "epoch": 1.28, + "learning_rate": "1.9277e-04", + "loss": 0.8811, + "slid_loss": 0.9308, + "step": 1333, + "time": 12.92 + }, + { + "epoch": 1.28, + "learning_rate": "1.9275e-04", + "loss": 0.9351, + "slid_loss": 0.9302, + "step": 1334, + "time": 11.72 + }, + { + "epoch": 1.28, + "learning_rate": "1.9273e-04", + "loss": 0.7796, + "slid_loss": 0.9287, + "step": 1335, + "time": 12.32 + }, + { + "epoch": 1.28, + "learning_rate": "1.9271e-04", + "loss": 0.9233, + "slid_loss": 0.9275, + "step": 1336, + "time": 13.73 + }, + { + "epoch": 1.28, + "learning_rate": "1.9270e-04", + "loss": 0.9651, + "slid_loss": 0.9282, + "step": 1337, + "time": 12.86 + }, + { + "epoch": 1.29, + "learning_rate": "1.9268e-04", + "loss": 0.9624, + "slid_loss": 0.9296, + "step": 1338, + "time": 13.34 + }, + { + "epoch": 1.29, + "learning_rate": "1.9266e-04", + "loss": 0.9484, + "slid_loss": 0.93, + "step": 1339, + "time": 12.46 + }, + { + "epoch": 1.29, + "learning_rate": "1.9264e-04", + "loss": 1.039, + "slid_loss": 0.9316, + "step": 1340, + "time": 11.74 + }, + { + "epoch": 1.29, + "learning_rate": "1.9263e-04", + "loss": 1.0187, + "slid_loss": 0.9313, + "step": 1341, + "time": 11.79 + }, + { + "epoch": 1.29, + "learning_rate": "1.9261e-04", + "loss": 0.998, + "slid_loss": 0.9322, + "step": 1342, + "time": 13.88 + }, + { + "epoch": 1.29, + "learning_rate": "1.9259e-04", + "loss": 1.1592, + "slid_loss": 0.934, + "step": 1343, + "time": 12.98 + }, + { + "epoch": 1.29, + "learning_rate": "1.9257e-04", + "loss": 0.9156, + "slid_loss": 0.9333, + "step": 1344, + "time": 13.57 + }, + { + "epoch": 1.29, + "learning_rate": "1.9256e-04", + "loss": 0.8495, + "slid_loss": 0.9321, + "step": 1345, + "time": 11.52 + }, + { + "epoch": 1.29, + "learning_rate": "1.9254e-04", + "loss": 0.9682, + "slid_loss": 0.932, + "step": 1346, + "time": 12.93 + }, + { + "epoch": 1.29, + "learning_rate": "1.9252e-04", + "loss": 0.9507, + "slid_loss": 0.9319, + "step": 1347, + "time": 13.44 + }, + { + "epoch": 1.29, + "learning_rate": "1.9250e-04", + "loss": 0.9336, + "slid_loss": 0.931, + "step": 1348, + "time": 13.15 + }, + { + "epoch": 1.3, + "learning_rate": "1.9249e-04", + "loss": 0.8783, + "slid_loss": 0.9306, + "step": 1349, + "time": 13.68 + }, + { + "epoch": 1.3, + "learning_rate": "1.9247e-04", + "loss": 0.8815, + "slid_loss": 0.9303, + "step": 1350, + "time": 13.3 + }, + { + "epoch": 1.3, + "learning_rate": "1.9245e-04", + "loss": 0.9405, + "slid_loss": 0.9305, + "step": 1351, + "time": 13.17 + }, + { + "epoch": 1.3, + "learning_rate": "1.9243e-04", + "loss": 0.9371, + "slid_loss": 0.9308, + "step": 1352, + "time": 14.09 + }, + { + "epoch": 1.3, + "learning_rate": "1.9242e-04", + "loss": 0.8862, + "slid_loss": 0.9307, + "step": 1353, + "time": 12.3 + }, + { + "epoch": 1.3, + "learning_rate": "1.9240e-04", + "loss": 0.9513, + "slid_loss": 0.9298, + "step": 1354, + "time": 13.6 + }, + { + "epoch": 1.3, + "learning_rate": "1.9238e-04", + "loss": 0.9643, + "slid_loss": 0.9304, + "step": 1355, + "time": 12.95 + }, + { + "epoch": 1.3, + "learning_rate": "1.9236e-04", + "loss": 1.0462, + "slid_loss": 0.931, + "step": 1356, + "time": 12.47 + }, + { + "epoch": 1.3, + "learning_rate": "1.9234e-04", + "loss": 0.8614, + "slid_loss": 0.9312, + "step": 1357, + "time": 13.2 + }, + { + "epoch": 1.3, + "learning_rate": "1.9233e-04", + "loss": 0.8833, + "slid_loss": 0.9305, + "step": 1358, + "time": 13.36 + }, + { + "epoch": 1.31, + "learning_rate": "1.9231e-04", + "loss": 0.95, + "slid_loss": 0.9314, + "step": 1359, + "time": 12.95 + }, + { + "epoch": 1.31, + "learning_rate": "1.9229e-04", + "loss": 0.8565, + "slid_loss": 0.9306, + "step": 1360, + "time": 12.25 + }, + { + "epoch": 1.31, + "learning_rate": "1.9227e-04", + "loss": 0.9033, + "slid_loss": 0.9306, + "step": 1361, + "time": 12.2 + }, + { + "epoch": 1.31, + "learning_rate": "1.9225e-04", + "loss": 0.952, + "slid_loss": 0.93, + "step": 1362, + "time": 14.01 + }, + { + "epoch": 1.31, + "learning_rate": "1.9224e-04", + "loss": 0.978, + "slid_loss": 0.9309, + "step": 1363, + "time": 13.17 + }, + { + "epoch": 1.31, + "learning_rate": "1.9222e-04", + "loss": 0.96, + "slid_loss": 0.931, + "step": 1364, + "time": 13.9 + }, + { + "epoch": 1.31, + "learning_rate": "1.9220e-04", + "loss": 0.9698, + "slid_loss": 0.9321, + "step": 1365, + "time": 12.95 + }, + { + "epoch": 1.31, + "learning_rate": "1.9218e-04", + "loss": 0.9172, + "slid_loss": 0.9314, + "step": 1366, + "time": 13.24 + }, + { + "epoch": 1.31, + "learning_rate": "1.9216e-04", + "loss": 0.8797, + "slid_loss": 0.9303, + "step": 1367, + "time": 11.34 + }, + { + "epoch": 1.31, + "learning_rate": "1.9215e-04", + "loss": 0.8177, + "slid_loss": 0.928, + "step": 1368, + "time": 13.48 + }, + { + "epoch": 1.32, + "learning_rate": "1.9213e-04", + "loss": 0.8938, + "slid_loss": 0.9278, + "step": 1369, + "time": 13.58 + }, + { + "epoch": 1.32, + "learning_rate": "1.9211e-04", + "loss": 0.8856, + "slid_loss": 0.9274, + "step": 1370, + "time": 13.41 + }, + { + "epoch": 1.32, + "learning_rate": "1.9209e-04", + "loss": 0.893, + "slid_loss": 0.9272, + "step": 1371, + "time": 12.59 + }, + { + "epoch": 1.32, + "learning_rate": "1.9207e-04", + "loss": 0.891, + "slid_loss": 0.9265, + "step": 1372, + "time": 13.51 + }, + { + "epoch": 1.32, + "learning_rate": "1.9206e-04", + "loss": 0.8974, + "slid_loss": 0.927, + "step": 1373, + "time": 11.92 + }, + { + "epoch": 1.32, + "learning_rate": "1.9204e-04", + "loss": 0.7257, + "slid_loss": 0.9253, + "step": 1374, + "time": 13.67 + }, + { + "epoch": 1.32, + "learning_rate": "1.9202e-04", + "loss": 0.9647, + "slid_loss": 0.9258, + "step": 1375, + "time": 13.3 + }, + { + "epoch": 1.32, + "learning_rate": "1.9200e-04", + "loss": 0.9692, + "slid_loss": 0.9275, + "step": 1376, + "time": 13.85 + }, + { + "epoch": 1.32, + "learning_rate": "1.9198e-04", + "loss": 0.905, + "slid_loss": 0.9274, + "step": 1377, + "time": 13.91 + }, + { + "epoch": 1.32, + "learning_rate": "1.9197e-04", + "loss": 0.9273, + "slid_loss": 0.9269, + "step": 1378, + "time": 12.93 + }, + { + "epoch": 1.32, + "learning_rate": "1.9195e-04", + "loss": 0.859, + "slid_loss": 0.9268, + "step": 1379, + "time": 13.49 + }, + { + "epoch": 1.33, + "learning_rate": "1.9193e-04", + "loss": 0.963, + "slid_loss": 0.9274, + "step": 1380, + "time": 13.78 + }, + { + "epoch": 1.33, + "learning_rate": "1.9191e-04", + "loss": 0.8228, + "slid_loss": 0.9263, + "step": 1381, + "time": 13.39 + }, + { + "epoch": 1.33, + "learning_rate": "1.9189e-04", + "loss": 0.8531, + "slid_loss": 0.926, + "step": 1382, + "time": 11.94 + }, + { + "epoch": 1.33, + "learning_rate": "1.9187e-04", + "loss": 1.0377, + "slid_loss": 0.9266, + "step": 1383, + "time": 13.65 + }, + { + "epoch": 1.33, + "learning_rate": "1.9186e-04", + "loss": 0.8803, + "slid_loss": 0.9263, + "step": 1384, + "time": 14.56 + }, + { + "epoch": 1.33, + "learning_rate": "1.9184e-04", + "loss": 0.9668, + "slid_loss": 0.9272, + "step": 1385, + "time": 13.65 + }, + { + "epoch": 1.33, + "learning_rate": "1.9182e-04", + "loss": 0.9109, + "slid_loss": 0.9281, + "step": 1386, + "time": 13.87 + }, + { + "epoch": 1.33, + "learning_rate": "1.9180e-04", + "loss": 0.9611, + "slid_loss": 0.9284, + "step": 1387, + "time": 13.5 + }, + { + "epoch": 1.33, + "learning_rate": "1.9178e-04", + "loss": 0.952, + "slid_loss": 0.9279, + "step": 1388, + "time": 12.96 + }, + { + "epoch": 1.33, + "learning_rate": "1.9176e-04", + "loss": 0.9555, + "slid_loss": 0.9279, + "step": 1389, + "time": 13.43 + }, + { + "epoch": 1.34, + "learning_rate": "1.9175e-04", + "loss": 0.9054, + "slid_loss": 0.9275, + "step": 1390, + "time": 13.61 + }, + { + "epoch": 1.34, + "learning_rate": "1.9173e-04", + "loss": 0.8712, + "slid_loss": 0.9265, + "step": 1391, + "time": 13.7 + }, + { + "epoch": 1.34, + "learning_rate": "1.9171e-04", + "loss": 0.8818, + "slid_loss": 0.9259, + "step": 1392, + "time": 14.27 + }, + { + "epoch": 1.34, + "learning_rate": "1.9169e-04", + "loss": 0.9165, + "slid_loss": 0.9255, + "step": 1393, + "time": 13.73 + }, + { + "epoch": 1.34, + "learning_rate": "1.9167e-04", + "loss": 0.8316, + "slid_loss": 0.9253, + "step": 1394, + "time": 13.41 + }, + { + "epoch": 1.34, + "learning_rate": "1.9165e-04", + "loss": 0.9135, + "slid_loss": 0.9246, + "step": 1395, + "time": 13.16 + }, + { + "epoch": 1.34, + "learning_rate": "1.9163e-04", + "loss": 1.0035, + "slid_loss": 0.9255, + "step": 1396, + "time": 13.32 + }, + { + "epoch": 1.34, + "learning_rate": "1.9162e-04", + "loss": 0.8899, + "slid_loss": 0.9241, + "step": 1397, + "time": 13.39 + }, + { + "epoch": 1.34, + "learning_rate": "1.9160e-04", + "loss": 0.8773, + "slid_loss": 0.9231, + "step": 1398, + "time": 13.5 + }, + { + "epoch": 1.34, + "learning_rate": "1.9158e-04", + "loss": 1.0457, + "slid_loss": 0.9245, + "step": 1399, + "time": 12.88 + }, + { + "epoch": 1.34, + "learning_rate": "1.9156e-04", + "loss": 0.8457, + "slid_loss": 0.9232, + "step": 1400, + "time": 13.48 + }, + { + "epoch": 1.35, + "learning_rate": "1.9154e-04", + "loss": 0.9269, + "slid_loss": 0.9229, + "step": 1401, + "time": 12.76 + }, + { + "epoch": 1.35, + "learning_rate": "1.9152e-04", + "loss": 0.9535, + "slid_loss": 0.9235, + "step": 1402, + "time": 13.0 + }, + { + "epoch": 1.35, + "learning_rate": "1.9150e-04", + "loss": 0.8583, + "slid_loss": 0.9226, + "step": 1403, + "time": 11.4 + }, + { + "epoch": 1.35, + "learning_rate": "1.9148e-04", + "loss": 0.8369, + "slid_loss": 0.9216, + "step": 1404, + "time": 14.11 + }, + { + "epoch": 1.35, + "learning_rate": "1.9147e-04", + "loss": 0.8748, + "slid_loss": 0.9221, + "step": 1405, + "time": 12.94 + }, + { + "epoch": 1.35, + "learning_rate": "1.9145e-04", + "loss": 0.8523, + "slid_loss": 0.9206, + "step": 1406, + "time": 12.46 + }, + { + "epoch": 1.35, + "learning_rate": "1.9143e-04", + "loss": 0.8802, + "slid_loss": 0.9203, + "step": 1407, + "time": 11.21 + }, + { + "epoch": 1.35, + "learning_rate": "1.9141e-04", + "loss": 0.9475, + "slid_loss": 0.9207, + "step": 1408, + "time": 13.26 + }, + { + "epoch": 1.35, + "learning_rate": "1.9139e-04", + "loss": 0.8981, + "slid_loss": 0.9204, + "step": 1409, + "time": 13.84 + }, + { + "epoch": 1.35, + "learning_rate": "1.9137e-04", + "loss": 0.927, + "slid_loss": 0.9207, + "step": 1410, + "time": 13.46 + }, + { + "epoch": 1.36, + "learning_rate": "1.9135e-04", + "loss": 0.928, + "slid_loss": 0.9204, + "step": 1411, + "time": 13.07 + }, + { + "epoch": 1.36, + "learning_rate": "1.9133e-04", + "loss": 0.8298, + "slid_loss": 0.9194, + "step": 1412, + "time": 13.21 + }, + { + "epoch": 1.36, + "learning_rate": "1.9132e-04", + "loss": 1.035, + "slid_loss": 0.9203, + "step": 1413, + "time": 13.55 + }, + { + "epoch": 1.36, + "learning_rate": "1.9130e-04", + "loss": 0.9949, + "slid_loss": 0.9219, + "step": 1414, + "time": 13.44 + }, + { + "epoch": 1.36, + "learning_rate": "1.9128e-04", + "loss": 0.9171, + "slid_loss": 0.9218, + "step": 1415, + "time": 11.89 + }, + { + "epoch": 1.36, + "learning_rate": "1.9126e-04", + "loss": 0.9529, + "slid_loss": 0.9217, + "step": 1416, + "time": 13.42 + }, + { + "epoch": 1.36, + "learning_rate": "1.9124e-04", + "loss": 0.8169, + "slid_loss": 0.9201, + "step": 1417, + "time": 12.26 + }, + { + "epoch": 1.36, + "learning_rate": "1.9122e-04", + "loss": 0.8369, + "slid_loss": 0.9186, + "step": 1418, + "time": 11.29 + }, + { + "epoch": 1.36, + "learning_rate": "1.9120e-04", + "loss": 0.992, + "slid_loss": 0.9195, + "step": 1419, + "time": 11.7 + }, + { + "epoch": 1.36, + "learning_rate": "1.9118e-04", + "loss": 0.9001, + "slid_loss": 0.9189, + "step": 1420, + "time": 11.41 + }, + { + "epoch": 1.37, + "learning_rate": "1.9116e-04", + "loss": 0.9272, + "slid_loss": 0.9191, + "step": 1421, + "time": 12.84 + }, + { + "epoch": 1.37, + "learning_rate": "1.9114e-04", + "loss": 0.9079, + "slid_loss": 0.9185, + "step": 1422, + "time": 14.34 + }, + { + "epoch": 1.37, + "learning_rate": "1.9113e-04", + "loss": 0.8037, + "slid_loss": 0.9177, + "step": 1423, + "time": 12.88 + }, + { + "epoch": 1.37, + "learning_rate": "1.9111e-04", + "loss": 0.9645, + "slid_loss": 0.9172, + "step": 1424, + "time": 12.84 + }, + { + "epoch": 1.37, + "learning_rate": "1.9109e-04", + "loss": 1.004, + "slid_loss": 0.9173, + "step": 1425, + "time": 12.87 + }, + { + "epoch": 1.37, + "learning_rate": "1.9107e-04", + "loss": 0.8612, + "slid_loss": 0.9166, + "step": 1426, + "time": 11.88 + }, + { + "epoch": 1.37, + "learning_rate": "1.9105e-04", + "loss": 0.9573, + "slid_loss": 0.9169, + "step": 1427, + "time": 11.63 + }, + { + "epoch": 1.37, + "learning_rate": "1.9103e-04", + "loss": 0.9261, + "slid_loss": 0.9174, + "step": 1428, + "time": 13.32 + }, + { + "epoch": 1.37, + "learning_rate": "1.9101e-04", + "loss": 0.9471, + "slid_loss": 0.9181, + "step": 1429, + "time": 13.02 + }, + { + "epoch": 1.37, + "learning_rate": "1.9099e-04", + "loss": 0.8639, + "slid_loss": 0.9179, + "step": 1430, + "time": 13.24 + }, + { + "epoch": 1.37, + "learning_rate": "1.9097e-04", + "loss": 0.8136, + "slid_loss": 0.9175, + "step": 1431, + "time": 11.17 + }, + { + "epoch": 1.38, + "learning_rate": "1.9095e-04", + "loss": 0.8149, + "slid_loss": 0.9169, + "step": 1432, + "time": 13.22 + }, + { + "epoch": 1.38, + "learning_rate": "1.9093e-04", + "loss": 1.0393, + "slid_loss": 0.9185, + "step": 1433, + "time": 11.8 + }, + { + "epoch": 1.38, + "learning_rate": "1.9091e-04", + "loss": 0.9198, + "slid_loss": 0.9183, + "step": 1434, + "time": 12.72 + }, + { + "epoch": 1.38, + "learning_rate": "1.9090e-04", + "loss": 0.9457, + "slid_loss": 0.92, + "step": 1435, + "time": 11.38 + }, + { + "epoch": 1.38, + "learning_rate": "1.9088e-04", + "loss": 0.8744, + "slid_loss": 0.9195, + "step": 1436, + "time": 12.22 + }, + { + "epoch": 1.38, + "learning_rate": "1.9086e-04", + "loss": 0.8906, + "slid_loss": 0.9188, + "step": 1437, + "time": 13.76 + }, + { + "epoch": 1.38, + "learning_rate": "1.9084e-04", + "loss": 0.8405, + "slid_loss": 0.9175, + "step": 1438, + "time": 14.1 + }, + { + "epoch": 1.38, + "learning_rate": "1.9082e-04", + "loss": 0.8777, + "slid_loss": 0.9168, + "step": 1439, + "time": 11.41 + }, + { + "epoch": 1.38, + "learning_rate": "1.9080e-04", + "loss": 0.8603, + "slid_loss": 0.915, + "step": 1440, + "time": 12.53 + }, + { + "epoch": 1.38, + "learning_rate": "1.9078e-04", + "loss": 0.8282, + "slid_loss": 0.9131, + "step": 1441, + "time": 12.04 + }, + { + "epoch": 1.39, + "learning_rate": "1.9076e-04", + "loss": 0.8724, + "slid_loss": 0.9119, + "step": 1442, + "time": 14.19 + }, + { + "epoch": 1.39, + "learning_rate": "1.9074e-04", + "loss": 0.8638, + "slid_loss": 0.9089, + "step": 1443, + "time": 13.05 + }, + { + "epoch": 1.39, + "learning_rate": "1.9072e-04", + "loss": 0.8757, + "slid_loss": 0.9085, + "step": 1444, + "time": 13.8 + }, + { + "epoch": 1.39, + "learning_rate": "1.9070e-04", + "loss": 0.8016, + "slid_loss": 0.908, + "step": 1445, + "time": 11.84 + }, + { + "epoch": 1.39, + "learning_rate": "1.9068e-04", + "loss": 0.8614, + "slid_loss": 0.907, + "step": 1446, + "time": 13.39 + }, + { + "epoch": 1.39, + "learning_rate": "1.9066e-04", + "loss": 0.8978, + "slid_loss": 0.9064, + "step": 1447, + "time": 13.38 + }, + { + "epoch": 1.39, + "learning_rate": "1.9064e-04", + "loss": 0.8227, + "slid_loss": 0.9053, + "step": 1448, + "time": 12.59 + }, + { + "epoch": 1.39, + "learning_rate": "1.9062e-04", + "loss": 0.9705, + "slid_loss": 0.9063, + "step": 1449, + "time": 13.6 + }, + { + "epoch": 1.39, + "learning_rate": "1.9060e-04", + "loss": 0.8924, + "slid_loss": 0.9064, + "step": 1450, + "time": 12.7 + }, + { + "epoch": 1.39, + "learning_rate": "1.9058e-04", + "loss": 0.8413, + "slid_loss": 0.9054, + "step": 1451, + "time": 13.16 + }, + { + "epoch": 1.39, + "learning_rate": "1.9056e-04", + "loss": 0.9253, + "slid_loss": 0.9053, + "step": 1452, + "time": 13.83 + }, + { + "epoch": 1.4, + "learning_rate": "1.9055e-04", + "loss": 0.7543, + "slid_loss": 0.9039, + "step": 1453, + "time": 12.87 + }, + { + "epoch": 1.4, + "learning_rate": "1.9053e-04", + "loss": 0.8943, + "slid_loss": 0.9034, + "step": 1454, + "time": 13.03 + }, + { + "epoch": 1.4, + "learning_rate": "1.9051e-04", + "loss": 0.9155, + "slid_loss": 0.9029, + "step": 1455, + "time": 12.89 + }, + { + "epoch": 1.4, + "learning_rate": "1.9049e-04", + "loss": 0.954, + "slid_loss": 0.902, + "step": 1456, + "time": 13.39 + }, + { + "epoch": 1.4, + "learning_rate": "1.9047e-04", + "loss": 0.8764, + "slid_loss": 0.9021, + "step": 1457, + "time": 12.04 + }, + { + "epoch": 1.4, + "learning_rate": "1.9045e-04", + "loss": 0.9024, + "slid_loss": 0.9023, + "step": 1458, + "time": 13.6 + }, + { + "epoch": 1.4, + "learning_rate": "1.9043e-04", + "loss": 0.9397, + "slid_loss": 0.9022, + "step": 1459, + "time": 13.3 + }, + { + "epoch": 1.4, + "learning_rate": "1.9041e-04", + "loss": 0.807, + "slid_loss": 0.9017, + "step": 1460, + "time": 12.98 + }, + { + "epoch": 1.4, + "learning_rate": "1.9039e-04", + "loss": 0.8565, + "slid_loss": 0.9012, + "step": 1461, + "time": 13.68 + }, + { + "epoch": 1.4, + "learning_rate": "1.9037e-04", + "loss": 0.9582, + "slid_loss": 0.9013, + "step": 1462, + "time": 13.57 + }, + { + "epoch": 1.41, + "learning_rate": "1.9035e-04", + "loss": 0.8449, + "slid_loss": 0.9, + "step": 1463, + "time": 13.23 + }, + { + "epoch": 1.41, + "learning_rate": "1.9033e-04", + "loss": 0.8039, + "slid_loss": 0.8984, + "step": 1464, + "time": 13.14 + }, + { + "epoch": 1.41, + "learning_rate": "1.9031e-04", + "loss": 0.8375, + "slid_loss": 0.8971, + "step": 1465, + "time": 11.24 + }, + { + "epoch": 1.41, + "learning_rate": "1.9029e-04", + "loss": 0.8942, + "slid_loss": 0.8969, + "step": 1466, + "time": 13.91 + }, + { + "epoch": 1.41, + "learning_rate": "1.9027e-04", + "loss": 0.8093, + "slid_loss": 0.8961, + "step": 1467, + "time": 12.82 + }, + { + "epoch": 1.41, + "learning_rate": "1.9025e-04", + "loss": 0.9197, + "slid_loss": 0.8972, + "step": 1468, + "time": 11.49 + }, + { + "epoch": 1.41, + "learning_rate": "1.9023e-04", + "loss": 0.8309, + "slid_loss": 0.8965, + "step": 1469, + "time": 13.62 + }, + { + "epoch": 1.41, + "learning_rate": "1.9021e-04", + "loss": 0.7911, + "slid_loss": 0.8956, + "step": 1470, + "time": 11.29 + }, + { + "epoch": 1.41, + "learning_rate": "1.9019e-04", + "loss": 0.9153, + "slid_loss": 0.8958, + "step": 1471, + "time": 13.95 + }, + { + "epoch": 1.41, + "learning_rate": "1.9017e-04", + "loss": 0.8722, + "slid_loss": 0.8956, + "step": 1472, + "time": 14.05 + }, + { + "epoch": 1.41, + "learning_rate": "1.9015e-04", + "loss": 0.8862, + "slid_loss": 0.8955, + "step": 1473, + "time": 11.82 + }, + { + "epoch": 1.42, + "learning_rate": "1.9013e-04", + "loss": 0.8181, + "slid_loss": 0.8964, + "step": 1474, + "time": 13.67 + }, + { + "epoch": 1.42, + "learning_rate": "1.9011e-04", + "loss": 0.8973, + "slid_loss": 0.8958, + "step": 1475, + "time": 13.24 + }, + { + "epoch": 1.42, + "learning_rate": "1.9009e-04", + "loss": 0.9214, + "slid_loss": 0.8953, + "step": 1476, + "time": 13.36 + }, + { + "epoch": 1.42, + "learning_rate": "1.9007e-04", + "loss": 0.9046, + "slid_loss": 0.8953, + "step": 1477, + "time": 11.75 + }, + { + "epoch": 1.42, + "learning_rate": "1.9005e-04", + "loss": 0.839, + "slid_loss": 0.8944, + "step": 1478, + "time": 13.3 + }, + { + "epoch": 1.42, + "learning_rate": "1.9003e-04", + "loss": 0.8992, + "slid_loss": 0.8948, + "step": 1479, + "time": 13.71 + }, + { + "epoch": 1.42, + "learning_rate": "1.9001e-04", + "loss": 0.9709, + "slid_loss": 0.8949, + "step": 1480, + "time": 13.57 + }, + { + "epoch": 1.42, + "learning_rate": "1.8999e-04", + "loss": 0.9609, + "slid_loss": 0.8963, + "step": 1481, + "time": 11.21 + }, + { + "epoch": 1.42, + "learning_rate": "1.8997e-04", + "loss": 0.834, + "slid_loss": 0.8961, + "step": 1482, + "time": 13.02 + }, + { + "epoch": 1.42, + "learning_rate": "1.8995e-04", + "loss": 0.8869, + "slid_loss": 0.8946, + "step": 1483, + "time": 12.7 + }, + { + "epoch": 1.43, + "learning_rate": "1.8993e-04", + "loss": 0.8648, + "slid_loss": 0.8944, + "step": 1484, + "time": 12.16 + }, + { + "epoch": 1.43, + "learning_rate": "1.8991e-04", + "loss": 0.911, + "slid_loss": 0.8939, + "step": 1485, + "time": 13.46 + }, + { + "epoch": 1.43, + "learning_rate": "1.8989e-04", + "loss": 0.9668, + "slid_loss": 0.8944, + "step": 1486, + "time": 11.48 + }, + { + "epoch": 1.43, + "learning_rate": "1.8987e-04", + "loss": 0.8711, + "slid_loss": 0.8935, + "step": 1487, + "time": 13.72 + }, + { + "epoch": 1.43, + "learning_rate": "1.8985e-04", + "loss": 0.9204, + "slid_loss": 0.8932, + "step": 1488, + "time": 13.33 + }, + { + "epoch": 1.43, + "learning_rate": "1.8983e-04", + "loss": 0.9283, + "slid_loss": 0.8929, + "step": 1489, + "time": 13.24 + }, + { + "epoch": 1.43, + "learning_rate": "1.8981e-04", + "loss": 0.8535, + "slid_loss": 0.8924, + "step": 1490, + "time": 13.82 + }, + { + "epoch": 1.43, + "learning_rate": "1.8979e-04", + "loss": 0.8063, + "slid_loss": 0.8918, + "step": 1491, + "time": 12.81 + }, + { + "epoch": 1.43, + "learning_rate": "1.8977e-04", + "loss": 0.8354, + "slid_loss": 0.8913, + "step": 1492, + "time": 13.3 + }, + { + "epoch": 1.43, + "learning_rate": "1.8975e-04", + "loss": 0.8483, + "slid_loss": 0.8906, + "step": 1493, + "time": 13.87 + }, + { + "epoch": 1.44, + "learning_rate": "1.8973e-04", + "loss": 0.9289, + "slid_loss": 0.8916, + "step": 1494, + "time": 13.29 + }, + { + "epoch": 1.44, + "learning_rate": "1.8971e-04", + "loss": 0.8886, + "slid_loss": 0.8913, + "step": 1495, + "time": 14.17 + }, + { + "epoch": 1.44, + "learning_rate": "1.8968e-04", + "loss": 0.9355, + "slid_loss": 0.8907, + "step": 1496, + "time": 14.97 + }, + { + "epoch": 1.44, + "learning_rate": "1.8966e-04", + "loss": 0.8532, + "slid_loss": 0.8903, + "step": 1497, + "time": 11.54 + }, + { + "epoch": 1.44, + "learning_rate": "1.8964e-04", + "loss": 0.8421, + "slid_loss": 0.8899, + "step": 1498, + "time": 13.54 + }, + { + "epoch": 1.44, + "learning_rate": "1.8962e-04", + "loss": 0.8613, + "slid_loss": 0.8881, + "step": 1499, + "time": 14.04 + }, + { + "epoch": 1.44, + "learning_rate": "1.8960e-04", + "loss": 0.7498, + "slid_loss": 0.8871, + "step": 1500, + "time": 11.3 + }, + { + "epoch": 1.44, + "learning_rate": "1.8958e-04", + "loss": 0.8904, + "slid_loss": 0.8868, + "step": 1501, + "time": 12.85 + }, + { + "epoch": 1.44, + "learning_rate": "1.8956e-04", + "loss": 0.885, + "slid_loss": 0.8861, + "step": 1502, + "time": 13.59 + }, + { + "epoch": 1.44, + "learning_rate": "1.8954e-04", + "loss": 0.7351, + "slid_loss": 0.8848, + "step": 1503, + "time": 12.56 + }, + { + "epoch": 1.44, + "learning_rate": "1.8952e-04", + "loss": 0.8382, + "slid_loss": 0.8849, + "step": 1504, + "time": 13.68 + }, + { + "epoch": 1.45, + "learning_rate": "1.8950e-04", + "loss": 0.9013, + "slid_loss": 0.8851, + "step": 1505, + "time": 13.07 + }, + { + "epoch": 1.45, + "learning_rate": "1.8948e-04", + "loss": 0.8669, + "slid_loss": 0.8853, + "step": 1506, + "time": 14.11 + }, + { + "epoch": 1.45, + "learning_rate": "1.8946e-04", + "loss": 0.9332, + "slid_loss": 0.8858, + "step": 1507, + "time": 14.78 + }, + { + "epoch": 1.45, + "learning_rate": "1.8944e-04", + "loss": 0.8608, + "slid_loss": 0.8849, + "step": 1508, + "time": 12.94 + }, + { + "epoch": 1.45, + "learning_rate": "1.8942e-04", + "loss": 0.9714, + "slid_loss": 0.8857, + "step": 1509, + "time": 11.57 + }, + { + "epoch": 1.45, + "learning_rate": "1.8940e-04", + "loss": 0.9011, + "slid_loss": 0.8854, + "step": 1510, + "time": 11.3 + }, + { + "epoch": 1.45, + "learning_rate": "1.8938e-04", + "loss": 0.855, + "slid_loss": 0.8847, + "step": 1511, + "time": 13.36 + }, + { + "epoch": 1.45, + "learning_rate": "1.8936e-04", + "loss": 0.9487, + "slid_loss": 0.8859, + "step": 1512, + "time": 13.48 + }, + { + "epoch": 1.45, + "learning_rate": "1.8934e-04", + "loss": 0.8822, + "slid_loss": 0.8843, + "step": 1513, + "time": 14.25 + }, + { + "epoch": 1.45, + "learning_rate": "1.8931e-04", + "loss": 0.7983, + "slid_loss": 0.8824, + "step": 1514, + "time": 13.96 + }, + { + "epoch": 1.46, + "learning_rate": "1.8929e-04", + "loss": 1.0528, + "slid_loss": 0.8837, + "step": 1515, + "time": 11.67 + }, + { + "epoch": 1.46, + "learning_rate": "1.8927e-04", + "loss": 0.8597, + "slid_loss": 0.8828, + "step": 1516, + "time": 14.21 + }, + { + "epoch": 1.46, + "learning_rate": "1.8925e-04", + "loss": 0.9117, + "slid_loss": 0.8838, + "step": 1517, + "time": 12.88 + }, + { + "epoch": 1.46, + "learning_rate": "1.8923e-04", + "loss": 0.914, + "slid_loss": 0.8845, + "step": 1518, + "time": 13.08 + }, + { + "epoch": 1.46, + "learning_rate": "1.8921e-04", + "loss": 0.9285, + "slid_loss": 0.8839, + "step": 1519, + "time": 14.04 + }, + { + "epoch": 1.46, + "learning_rate": "1.8919e-04", + "loss": 0.8524, + "slid_loss": 0.8834, + "step": 1520, + "time": 13.89 + }, + { + "epoch": 1.46, + "learning_rate": "1.8917e-04", + "loss": 0.8491, + "slid_loss": 0.8826, + "step": 1521, + "time": 13.37 + }, + { + "epoch": 1.46, + "learning_rate": "1.8915e-04", + "loss": 0.8738, + "slid_loss": 0.8823, + "step": 1522, + "time": 12.14 + }, + { + "epoch": 1.46, + "learning_rate": "1.8913e-04", + "loss": 0.7718, + "slid_loss": 0.882, + "step": 1523, + "time": 12.83 + }, + { + "epoch": 1.46, + "learning_rate": "1.8911e-04", + "loss": 0.8818, + "slid_loss": 0.8811, + "step": 1524, + "time": 13.23 + }, + { + "epoch": 1.46, + "learning_rate": "1.8909e-04", + "loss": 0.9648, + "slid_loss": 0.8807, + "step": 1525, + "time": 13.87 + }, + { + "epoch": 1.47, + "learning_rate": "1.8906e-04", + "loss": 0.9511, + "slid_loss": 0.8816, + "step": 1526, + "time": 13.31 + }, + { + "epoch": 1.47, + "learning_rate": "1.8904e-04", + "loss": 0.9515, + "slid_loss": 0.8816, + "step": 1527, + "time": 13.25 + }, + { + "epoch": 1.47, + "learning_rate": "1.8902e-04", + "loss": 0.8144, + "slid_loss": 0.8805, + "step": 1528, + "time": 11.91 + }, + { + "epoch": 1.47, + "learning_rate": "1.8900e-04", + "loss": 0.8155, + "slid_loss": 0.8792, + "step": 1529, + "time": 13.78 + }, + { + "epoch": 1.47, + "learning_rate": "1.8898e-04", + "loss": 0.9527, + "slid_loss": 0.88, + "step": 1530, + "time": 12.25 + }, + { + "epoch": 1.47, + "learning_rate": "1.8896e-04", + "loss": 0.8869, + "slid_loss": 0.8808, + "step": 1531, + "time": 12.95 + }, + { + "epoch": 1.47, + "learning_rate": "1.8894e-04", + "loss": 0.9171, + "slid_loss": 0.8818, + "step": 1532, + "time": 13.53 + }, + { + "epoch": 1.47, + "learning_rate": "1.8892e-04", + "loss": 0.8461, + "slid_loss": 0.8799, + "step": 1533, + "time": 13.2 + }, + { + "epoch": 1.47, + "learning_rate": "1.8890e-04", + "loss": 0.9913, + "slid_loss": 0.8806, + "step": 1534, + "time": 13.25 + }, + { + "epoch": 1.47, + "learning_rate": "1.8888e-04", + "loss": 0.8733, + "slid_loss": 0.8799, + "step": 1535, + "time": 13.92 + }, + { + "epoch": 1.48, + "learning_rate": "1.8885e-04", + "loss": 0.8552, + "slid_loss": 0.8797, + "step": 1536, + "time": 13.56 + }, + { + "epoch": 1.48, + "learning_rate": "1.8883e-04", + "loss": 0.9202, + "slid_loss": 0.88, + "step": 1537, + "time": 12.99 + }, + { + "epoch": 1.48, + "learning_rate": "1.8881e-04", + "loss": 0.912, + "slid_loss": 0.8807, + "step": 1538, + "time": 12.22 + }, + { + "epoch": 1.48, + "learning_rate": "1.8879e-04", + "loss": 0.8168, + "slid_loss": 0.8801, + "step": 1539, + "time": 12.98 + }, + { + "epoch": 1.48, + "learning_rate": "1.8877e-04", + "loss": 1.0205, + "slid_loss": 0.8817, + "step": 1540, + "time": 13.17 + }, + { + "epoch": 1.48, + "learning_rate": "1.8875e-04", + "loss": 1.0057, + "slid_loss": 0.8834, + "step": 1541, + "time": 12.94 + }, + { + "epoch": 1.48, + "learning_rate": "1.8873e-04", + "loss": 0.8408, + "slid_loss": 0.8831, + "step": 1542, + "time": 14.32 + }, + { + "epoch": 1.48, + "learning_rate": "1.8871e-04", + "loss": 0.8673, + "slid_loss": 0.8832, + "step": 1543, + "time": 11.5 + }, + { + "epoch": 1.48, + "learning_rate": "1.8869e-04", + "loss": 0.861, + "slid_loss": 0.883, + "step": 1544, + "time": 13.27 + }, + { + "epoch": 1.48, + "learning_rate": "1.8866e-04", + "loss": 0.9508, + "slid_loss": 0.8845, + "step": 1545, + "time": 12.24 + }, + { + "epoch": 1.49, + "learning_rate": "1.8864e-04", + "loss": 0.8187, + "slid_loss": 0.8841, + "step": 1546, + "time": 12.26 + }, + { + "epoch": 1.49, + "learning_rate": "1.8862e-04", + "loss": 0.8771, + "slid_loss": 0.8839, + "step": 1547, + "time": 13.41 + }, + { + "epoch": 1.49, + "learning_rate": "1.8860e-04", + "loss": 0.8355, + "slid_loss": 0.884, + "step": 1548, + "time": 13.2 + }, + { + "epoch": 1.49, + "learning_rate": "1.8858e-04", + "loss": 0.8385, + "slid_loss": 0.8827, + "step": 1549, + "time": 14.0 + }, + { + "epoch": 1.49, + "learning_rate": "1.8856e-04", + "loss": 0.8897, + "slid_loss": 0.8827, + "step": 1550, + "time": 14.35 + }, + { + "epoch": 1.49, + "learning_rate": "1.8854e-04", + "loss": 0.8735, + "slid_loss": 0.883, + "step": 1551, + "time": 12.77 + }, + { + "epoch": 1.49, + "learning_rate": "1.8851e-04", + "loss": 0.8096, + "slid_loss": 0.8818, + "step": 1552, + "time": 12.4 + }, + { + "epoch": 1.49, + "learning_rate": "1.8849e-04", + "loss": 0.8893, + "slid_loss": 0.8832, + "step": 1553, + "time": 14.18 + }, + { + "epoch": 1.49, + "learning_rate": "1.8847e-04", + "loss": 0.8604, + "slid_loss": 0.8828, + "step": 1554, + "time": 13.67 + }, + { + "epoch": 1.49, + "learning_rate": "1.8845e-04", + "loss": 0.8769, + "slid_loss": 0.8824, + "step": 1555, + "time": 13.25 + }, + { + "epoch": 1.49, + "learning_rate": "1.8843e-04", + "loss": 0.9607, + "slid_loss": 0.8825, + "step": 1556, + "time": 13.65 + }, + { + "epoch": 1.5, + "learning_rate": "1.8841e-04", + "loss": 0.9653, + "slid_loss": 0.8834, + "step": 1557, + "time": 13.53 + }, + { + "epoch": 1.5, + "learning_rate": "1.8839e-04", + "loss": 0.7939, + "slid_loss": 0.8823, + "step": 1558, + "time": 12.87 + }, + { + "epoch": 1.5, + "learning_rate": "1.8836e-04", + "loss": 0.7867, + "slid_loss": 0.8808, + "step": 1559, + "time": 13.52 + }, + { + "epoch": 1.5, + "learning_rate": "1.8834e-04", + "loss": 0.8356, + "slid_loss": 0.8811, + "step": 1560, + "time": 14.4 + }, + { + "epoch": 1.5, + "learning_rate": "1.8832e-04", + "loss": 0.8589, + "slid_loss": 0.8811, + "step": 1561, + "time": 13.99 + }, + { + "epoch": 1.5, + "learning_rate": "1.8830e-04", + "loss": 0.8716, + "slid_loss": 0.8802, + "step": 1562, + "time": 13.82 + }, + { + "epoch": 1.5, + "learning_rate": "1.8828e-04", + "loss": 0.9385, + "slid_loss": 0.8812, + "step": 1563, + "time": 12.86 + }, + { + "epoch": 1.5, + "learning_rate": "1.8826e-04", + "loss": 0.8067, + "slid_loss": 0.8812, + "step": 1564, + "time": 13.06 + }, + { + "epoch": 1.5, + "learning_rate": "1.8824e-04", + "loss": 0.8867, + "slid_loss": 0.8817, + "step": 1565, + "time": 14.02 + }, + { + "epoch": 1.5, + "learning_rate": "1.8821e-04", + "loss": 0.9579, + "slid_loss": 0.8823, + "step": 1566, + "time": 11.32 + }, + { + "epoch": 1.51, + "learning_rate": "1.8819e-04", + "loss": 0.8706, + "slid_loss": 0.8829, + "step": 1567, + "time": 13.46 + }, + { + "epoch": 1.51, + "learning_rate": "1.8817e-04", + "loss": 0.924, + "slid_loss": 0.883, + "step": 1568, + "time": 13.54 + }, + { + "epoch": 1.51, + "learning_rate": "1.8815e-04", + "loss": 0.9206, + "slid_loss": 0.8839, + "step": 1569, + "time": 11.96 + }, + { + "epoch": 1.51, + "learning_rate": "1.8813e-04", + "loss": 0.8313, + "slid_loss": 0.8843, + "step": 1570, + "time": 13.49 + }, + { + "epoch": 1.51, + "learning_rate": "1.8811e-04", + "loss": 0.9262, + "slid_loss": 0.8844, + "step": 1571, + "time": 13.53 + }, + { + "epoch": 1.51, + "learning_rate": "1.8808e-04", + "loss": 0.8273, + "slid_loss": 0.8839, + "step": 1572, + "time": 14.18 + }, + { + "epoch": 1.51, + "learning_rate": "1.8806e-04", + "loss": 0.8284, + "slid_loss": 0.8834, + "step": 1573, + "time": 13.72 + }, + { + "epoch": 1.51, + "learning_rate": "1.8804e-04", + "loss": 0.8375, + "slid_loss": 0.8835, + "step": 1574, + "time": 13.52 + }, + { + "epoch": 1.51, + "learning_rate": "1.8802e-04", + "loss": 0.9035, + "slid_loss": 0.8836, + "step": 1575, + "time": 13.32 + }, + { + "epoch": 1.51, + "learning_rate": "1.8800e-04", + "loss": 0.9119, + "slid_loss": 0.8835, + "step": 1576, + "time": 13.91 + }, + { + "epoch": 1.51, + "learning_rate": "1.8797e-04", + "loss": 0.8213, + "slid_loss": 0.8827, + "step": 1577, + "time": 13.39 + }, + { + "epoch": 1.52, + "learning_rate": "1.8795e-04", + "loss": 0.9077, + "slid_loss": 0.8834, + "step": 1578, + "time": 12.9 + }, + { + "epoch": 1.52, + "learning_rate": "1.8793e-04", + "loss": 0.8485, + "slid_loss": 0.8829, + "step": 1579, + "time": 13.3 + }, + { + "epoch": 1.52, + "learning_rate": "1.8791e-04", + "loss": 0.8139, + "slid_loss": 0.8813, + "step": 1580, + "time": 12.34 + }, + { + "epoch": 1.52, + "learning_rate": "1.8789e-04", + "loss": 0.922, + "slid_loss": 0.8809, + "step": 1581, + "time": 14.02 + }, + { + "epoch": 1.52, + "learning_rate": "1.8787e-04", + "loss": 0.9958, + "slid_loss": 0.8825, + "step": 1582, + "time": 13.22 + }, + { + "epoch": 1.52, + "learning_rate": "1.8784e-04", + "loss": 0.938, + "slid_loss": 0.883, + "step": 1583, + "time": 13.64 + }, + { + "epoch": 1.52, + "learning_rate": "1.8782e-04", + "loss": 0.8719, + "slid_loss": 0.8831, + "step": 1584, + "time": 13.45 + }, + { + "epoch": 1.52, + "learning_rate": "1.8780e-04", + "loss": 0.803, + "slid_loss": 0.882, + "step": 1585, + "time": 14.42 + }, + { + "epoch": 1.52, + "learning_rate": "1.8778e-04", + "loss": 0.8928, + "slid_loss": 0.8813, + "step": 1586, + "time": 13.43 + }, + { + "epoch": 1.52, + "learning_rate": "1.8776e-04", + "loss": 0.808, + "slid_loss": 0.8807, + "step": 1587, + "time": 11.51 + }, + { + "epoch": 1.53, + "learning_rate": "1.8773e-04", + "loss": 0.9355, + "slid_loss": 0.8808, + "step": 1588, + "time": 11.51 + }, + { + "epoch": 1.53, + "learning_rate": "1.8771e-04", + "loss": 0.9875, + "slid_loss": 0.8814, + "step": 1589, + "time": 13.1 + }, + { + "epoch": 1.53, + "learning_rate": "1.8769e-04", + "loss": 0.8737, + "slid_loss": 0.8816, + "step": 1590, + "time": 11.82 + }, + { + "epoch": 1.53, + "learning_rate": "1.8767e-04", + "loss": 0.9711, + "slid_loss": 0.8832, + "step": 1591, + "time": 13.29 + }, + { + "epoch": 1.53, + "learning_rate": "1.8765e-04", + "loss": 0.8412, + "slid_loss": 0.8833, + "step": 1592, + "time": 11.37 + }, + { + "epoch": 1.53, + "learning_rate": "1.8762e-04", + "loss": 0.8723, + "slid_loss": 0.8835, + "step": 1593, + "time": 13.39 + }, + { + "epoch": 1.53, + "learning_rate": "1.8760e-04", + "loss": 0.8755, + "slid_loss": 0.883, + "step": 1594, + "time": 13.27 + }, + { + "epoch": 1.53, + "learning_rate": "1.8758e-04", + "loss": 0.9335, + "slid_loss": 0.8835, + "step": 1595, + "time": 13.18 + }, + { + "epoch": 1.53, + "learning_rate": "1.8756e-04", + "loss": 0.7671, + "slid_loss": 0.8818, + "step": 1596, + "time": 13.37 + }, + { + "epoch": 1.53, + "learning_rate": "1.8753e-04", + "loss": 0.8048, + "slid_loss": 0.8813, + "step": 1597, + "time": 13.66 + }, + { + "epoch": 1.54, + "learning_rate": "1.8751e-04", + "loss": 0.8984, + "slid_loss": 0.8819, + "step": 1598, + "time": 12.19 + }, + { + "epoch": 1.54, + "learning_rate": "1.8749e-04", + "loss": 0.8758, + "slid_loss": 0.882, + "step": 1599, + "time": 12.15 + }, + { + "epoch": 1.54, + "learning_rate": "1.8747e-04", + "loss": 0.9149, + "slid_loss": 0.8837, + "step": 1600, + "time": 11.71 + }, + { + "epoch": 1.54, + "learning_rate": "1.8745e-04", + "loss": 0.9414, + "slid_loss": 0.8842, + "step": 1601, + "time": 13.73 + }, + { + "epoch": 1.54, + "learning_rate": "1.8742e-04", + "loss": 0.8856, + "slid_loss": 0.8842, + "step": 1602, + "time": 13.4 + }, + { + "epoch": 1.54, + "learning_rate": "1.8740e-04", + "loss": 1.0178, + "slid_loss": 0.887, + "step": 1603, + "time": 13.68 + }, + { + "epoch": 1.54, + "learning_rate": "1.8738e-04", + "loss": 0.8888, + "slid_loss": 0.8875, + "step": 1604, + "time": 12.96 + }, + { + "epoch": 1.54, + "learning_rate": "1.8736e-04", + "loss": 0.9464, + "slid_loss": 0.888, + "step": 1605, + "time": 13.52 + }, + { + "epoch": 1.54, + "learning_rate": "1.8733e-04", + "loss": 0.8917, + "slid_loss": 0.8882, + "step": 1606, + "time": 11.27 + }, + { + "epoch": 1.54, + "learning_rate": "1.8731e-04", + "loss": 0.857, + "slid_loss": 0.8874, + "step": 1607, + "time": 12.94 + }, + { + "epoch": 1.54, + "learning_rate": "1.8729e-04", + "loss": 0.9577, + "slid_loss": 0.8884, + "step": 1608, + "time": 12.86 + }, + { + "epoch": 1.55, + "learning_rate": "1.8727e-04", + "loss": 0.897, + "slid_loss": 0.8877, + "step": 1609, + "time": 13.85 + }, + { + "epoch": 1.55, + "learning_rate": "1.8725e-04", + "loss": 0.8651, + "slid_loss": 0.8873, + "step": 1610, + "time": 13.66 + }, + { + "epoch": 1.55, + "learning_rate": "1.8722e-04", + "loss": 1.0093, + "slid_loss": 0.8888, + "step": 1611, + "time": 13.41 + }, + { + "epoch": 1.55, + "learning_rate": "1.8720e-04", + "loss": 0.8059, + "slid_loss": 0.8874, + "step": 1612, + "time": 13.64 + }, + { + "epoch": 1.55, + "learning_rate": "1.8718e-04", + "loss": 0.9999, + "slid_loss": 0.8886, + "step": 1613, + "time": 13.33 + }, + { + "epoch": 1.55, + "learning_rate": "1.8716e-04", + "loss": 0.8195, + "slid_loss": 0.8888, + "step": 1614, + "time": 11.33 + }, + { + "epoch": 1.55, + "learning_rate": "1.8713e-04", + "loss": 0.9, + "slid_loss": 0.8873, + "step": 1615, + "time": 11.54 + }, + { + "epoch": 1.55, + "learning_rate": "1.8711e-04", + "loss": 0.7715, + "slid_loss": 0.8864, + "step": 1616, + "time": 14.44 + }, + { + "epoch": 1.55, + "learning_rate": "1.8709e-04", + "loss": 0.9013, + "slid_loss": 0.8863, + "step": 1617, + "time": 13.32 + }, + { + "epoch": 1.55, + "learning_rate": "1.8707e-04", + "loss": 0.8062, + "slid_loss": 0.8852, + "step": 1618, + "time": 13.25 + }, + { + "epoch": 1.56, + "learning_rate": "1.8704e-04", + "loss": 0.8497, + "slid_loss": 0.8844, + "step": 1619, + "time": 13.54 + }, + { + "epoch": 1.56, + "learning_rate": "1.8702e-04", + "loss": 0.7631, + "slid_loss": 0.8835, + "step": 1620, + "time": 13.67 + }, + { + "epoch": 1.56, + "learning_rate": "1.8700e-04", + "loss": 0.8759, + "slid_loss": 0.8838, + "step": 1621, + "time": 13.6 + }, + { + "epoch": 1.56, + "learning_rate": "1.8698e-04", + "loss": 0.9415, + "slid_loss": 0.8845, + "step": 1622, + "time": 11.55 + }, + { + "epoch": 1.56, + "learning_rate": "1.8695e-04", + "loss": 0.7913, + "slid_loss": 0.8847, + "step": 1623, + "time": 11.95 + }, + { + "epoch": 1.56, + "learning_rate": "1.8693e-04", + "loss": 0.8145, + "slid_loss": 0.884, + "step": 1624, + "time": 11.43 + }, + { + "epoch": 1.56, + "learning_rate": "1.8691e-04", + "loss": 0.8533, + "slid_loss": 0.8829, + "step": 1625, + "time": 12.98 + }, + { + "epoch": 1.56, + "learning_rate": "1.8689e-04", + "loss": 0.848, + "slid_loss": 0.8819, + "step": 1626, + "time": 13.32 + }, + { + "epoch": 1.56, + "learning_rate": "1.8686e-04", + "loss": 0.8651, + "slid_loss": 0.881, + "step": 1627, + "time": 14.15 + }, + { + "epoch": 1.56, + "learning_rate": "1.8684e-04", + "loss": 0.8467, + "slid_loss": 0.8813, + "step": 1628, + "time": 13.18 + }, + { + "epoch": 1.56, + "learning_rate": "1.8682e-04", + "loss": 0.8886, + "slid_loss": 0.882, + "step": 1629, + "time": 12.92 + }, + { + "epoch": 1.57, + "learning_rate": "1.8679e-04", + "loss": 0.8826, + "slid_loss": 0.8813, + "step": 1630, + "time": 13.0 + }, + { + "epoch": 1.57, + "learning_rate": "1.8677e-04", + "loss": 0.9009, + "slid_loss": 0.8815, + "step": 1631, + "time": 12.98 + }, + { + "epoch": 1.57, + "learning_rate": "1.8675e-04", + "loss": 0.95, + "slid_loss": 0.8818, + "step": 1632, + "time": 11.58 + }, + { + "epoch": 1.57, + "learning_rate": "1.8673e-04", + "loss": 0.8361, + "slid_loss": 0.8817, + "step": 1633, + "time": 11.95 + }, + { + "epoch": 1.57, + "learning_rate": "1.8670e-04", + "loss": 0.9011, + "slid_loss": 0.8808, + "step": 1634, + "time": 12.88 + }, + { + "epoch": 1.57, + "learning_rate": "1.8668e-04", + "loss": 0.9022, + "slid_loss": 0.8811, + "step": 1635, + "time": 13.86 + }, + { + "epoch": 1.57, + "learning_rate": "1.8666e-04", + "loss": 0.8486, + "slid_loss": 0.881, + "step": 1636, + "time": 12.95 + }, + { + "epoch": 1.57, + "learning_rate": "1.8664e-04", + "loss": 0.8239, + "slid_loss": 0.8801, + "step": 1637, + "time": 12.12 + }, + { + "epoch": 1.57, + "learning_rate": "1.8661e-04", + "loss": 0.9273, + "slid_loss": 0.8802, + "step": 1638, + "time": 14.12 + }, + { + "epoch": 1.57, + "learning_rate": "1.8659e-04", + "loss": 0.9531, + "slid_loss": 0.8816, + "step": 1639, + "time": 13.53 + }, + { + "epoch": 1.58, + "learning_rate": "1.8657e-04", + "loss": 0.9465, + "slid_loss": 0.8808, + "step": 1640, + "time": 12.78 + }, + { + "epoch": 1.58, + "learning_rate": "1.8654e-04", + "loss": 0.8981, + "slid_loss": 0.8798, + "step": 1641, + "time": 13.58 + }, + { + "epoch": 1.58, + "learning_rate": "1.8652e-04", + "loss": 0.9054, + "slid_loss": 0.8804, + "step": 1642, + "time": 14.65 + }, + { + "epoch": 1.58, + "learning_rate": "1.8650e-04", + "loss": 0.8536, + "slid_loss": 0.8803, + "step": 1643, + "time": 13.11 + }, + { + "epoch": 1.58, + "learning_rate": "1.8648e-04", + "loss": 0.7797, + "slid_loss": 0.8795, + "step": 1644, + "time": 12.19 + }, + { + "epoch": 1.58, + "learning_rate": "1.8645e-04", + "loss": 1.003, + "slid_loss": 0.88, + "step": 1645, + "time": 12.85 + }, + { + "epoch": 1.58, + "learning_rate": "1.8643e-04", + "loss": 0.9013, + "slid_loss": 0.8808, + "step": 1646, + "time": 13.75 + }, + { + "epoch": 1.58, + "learning_rate": "1.8641e-04", + "loss": 0.8613, + "slid_loss": 0.8807, + "step": 1647, + "time": 11.27 + }, + { + "epoch": 1.58, + "learning_rate": "1.8638e-04", + "loss": 0.8331, + "slid_loss": 0.8806, + "step": 1648, + "time": 13.3 + }, + { + "epoch": 1.58, + "learning_rate": "1.8636e-04", + "loss": 0.8033, + "slid_loss": 0.8803, + "step": 1649, + "time": 12.91 + }, + { + "epoch": 1.59, + "learning_rate": "1.8634e-04", + "loss": 0.7966, + "slid_loss": 0.8794, + "step": 1650, + "time": 11.78 + }, + { + "epoch": 1.59, + "learning_rate": "1.8631e-04", + "loss": 0.7707, + "slid_loss": 0.8783, + "step": 1651, + "time": 12.9 + }, + { + "epoch": 1.59, + "learning_rate": "1.8629e-04", + "loss": 0.8721, + "slid_loss": 0.8789, + "step": 1652, + "time": 13.71 + }, + { + "epoch": 1.59, + "learning_rate": "1.8627e-04", + "loss": 0.7965, + "slid_loss": 0.878, + "step": 1653, + "time": 13.15 + }, + { + "epoch": 1.59, + "learning_rate": "1.8624e-04", + "loss": 0.9431, + "slid_loss": 0.8788, + "step": 1654, + "time": 11.51 + }, + { + "epoch": 1.59, + "learning_rate": "1.8622e-04", + "loss": 0.7466, + "slid_loss": 0.8775, + "step": 1655, + "time": 13.2 + }, + { + "epoch": 1.59, + "learning_rate": "1.8620e-04", + "loss": 0.9377, + "slid_loss": 0.8773, + "step": 1656, + "time": 13.47 + }, + { + "epoch": 1.59, + "learning_rate": "1.8618e-04", + "loss": 0.8826, + "slid_loss": 0.8765, + "step": 1657, + "time": 13.63 + }, + { + "epoch": 1.59, + "learning_rate": "1.8615e-04", + "loss": 0.9438, + "slid_loss": 0.878, + "step": 1658, + "time": 13.21 + }, + { + "epoch": 1.59, + "learning_rate": "1.8613e-04", + "loss": 0.8679, + "slid_loss": 0.8788, + "step": 1659, + "time": 13.78 + }, + { + "epoch": 1.59, + "learning_rate": "1.8611e-04", + "loss": 0.8157, + "slid_loss": 0.8786, + "step": 1660, + "time": 13.09 + }, + { + "epoch": 1.6, + "learning_rate": "1.8608e-04", + "loss": 0.8443, + "slid_loss": 0.8785, + "step": 1661, + "time": 12.92 + }, + { + "epoch": 1.6, + "learning_rate": "1.8606e-04", + "loss": 0.8592, + "slid_loss": 0.8783, + "step": 1662, + "time": 13.51 + }, + { + "epoch": 1.6, + "learning_rate": "1.8604e-04", + "loss": 0.8848, + "slid_loss": 0.8778, + "step": 1663, + "time": 13.81 + }, + { + "epoch": 1.6, + "learning_rate": "1.8601e-04", + "loss": 0.8622, + "slid_loss": 0.8784, + "step": 1664, + "time": 13.46 + }, + { + "epoch": 1.6, + "learning_rate": "1.8599e-04", + "loss": 0.9053, + "slid_loss": 0.8785, + "step": 1665, + "time": 14.04 + }, + { + "epoch": 1.6, + "learning_rate": "1.8597e-04", + "loss": 0.7656, + "slid_loss": 0.8766, + "step": 1666, + "time": 11.16 + }, + { + "epoch": 1.6, + "learning_rate": "1.8594e-04", + "loss": 0.9854, + "slid_loss": 0.8778, + "step": 1667, + "time": 13.35 + }, + { + "epoch": 1.6, + "learning_rate": "1.8592e-04", + "loss": 0.8367, + "slid_loss": 0.8769, + "step": 1668, + "time": 11.99 + }, + { + "epoch": 1.6, + "learning_rate": "1.8590e-04", + "loss": 0.8438, + "slid_loss": 0.8761, + "step": 1669, + "time": 12.97 + }, + { + "epoch": 1.6, + "learning_rate": "1.8587e-04", + "loss": 1.0256, + "slid_loss": 0.8781, + "step": 1670, + "time": 12.88 + }, + { + "epoch": 1.61, + "learning_rate": "1.8585e-04", + "loss": 0.956, + "slid_loss": 0.8784, + "step": 1671, + "time": 12.96 + }, + { + "epoch": 1.61, + "learning_rate": "1.8583e-04", + "loss": 0.8589, + "slid_loss": 0.8787, + "step": 1672, + "time": 11.62 + }, + { + "epoch": 1.61, + "learning_rate": "1.8580e-04", + "loss": 0.8256, + "slid_loss": 0.8786, + "step": 1673, + "time": 10.74 + }, + { + "epoch": 1.61, + "learning_rate": "1.8578e-04", + "loss": 0.7996, + "slid_loss": 0.8783, + "step": 1674, + "time": 13.82 + }, + { + "epoch": 1.61, + "learning_rate": "1.8576e-04", + "loss": 0.9398, + "slid_loss": 0.8786, + "step": 1675, + "time": 13.2 + }, + { + "epoch": 1.61, + "learning_rate": "1.8573e-04", + "loss": 0.8629, + "slid_loss": 0.8781, + "step": 1676, + "time": 12.08 + }, + { + "epoch": 1.61, + "learning_rate": "1.8571e-04", + "loss": 0.9217, + "slid_loss": 0.8791, + "step": 1677, + "time": 13.48 + }, + { + "epoch": 1.61, + "learning_rate": "1.8569e-04", + "loss": 0.7856, + "slid_loss": 0.8779, + "step": 1678, + "time": 11.48 + }, + { + "epoch": 1.61, + "learning_rate": "1.8566e-04", + "loss": 0.8381, + "slid_loss": 0.8778, + "step": 1679, + "time": 12.18 + }, + { + "epoch": 1.61, + "learning_rate": "1.8564e-04", + "loss": 0.7912, + "slid_loss": 0.8776, + "step": 1680, + "time": 11.3 + }, + { + "epoch": 1.61, + "learning_rate": "1.8562e-04", + "loss": 0.7995, + "slid_loss": 0.8764, + "step": 1681, + "time": 13.67 + }, + { + "epoch": 1.62, + "learning_rate": "1.8559e-04", + "loss": 0.8343, + "slid_loss": 0.8748, + "step": 1682, + "time": 13.6 + }, + { + "epoch": 1.62, + "learning_rate": "1.8557e-04", + "loss": 0.8654, + "slid_loss": 0.874, + "step": 1683, + "time": 13.31 + }, + { + "epoch": 1.62, + "learning_rate": "1.8554e-04", + "loss": 0.8979, + "slid_loss": 0.8743, + "step": 1684, + "time": 12.49 + }, + { + "epoch": 1.62, + "learning_rate": "1.8552e-04", + "loss": 0.8969, + "slid_loss": 0.8752, + "step": 1685, + "time": 13.75 + }, + { + "epoch": 1.62, + "learning_rate": "1.8550e-04", + "loss": 0.8643, + "slid_loss": 0.8749, + "step": 1686, + "time": 13.34 + }, + { + "epoch": 1.62, + "learning_rate": "1.8547e-04", + "loss": 0.9115, + "slid_loss": 0.876, + "step": 1687, + "time": 13.54 + }, + { + "epoch": 1.62, + "learning_rate": "1.8545e-04", + "loss": 0.7537, + "slid_loss": 0.8742, + "step": 1688, + "time": 12.99 + }, + { + "epoch": 1.62, + "learning_rate": "1.8543e-04", + "loss": 0.834, + "slid_loss": 0.8726, + "step": 1689, + "time": 13.12 + }, + { + "epoch": 1.62, + "learning_rate": "1.8540e-04", + "loss": 0.7974, + "slid_loss": 0.8719, + "step": 1690, + "time": 12.74 + }, + { + "epoch": 1.62, + "learning_rate": "1.8538e-04", + "loss": 0.9127, + "slid_loss": 0.8713, + "step": 1691, + "time": 13.52 + }, + { + "epoch": 1.63, + "learning_rate": "1.8536e-04", + "loss": 0.8939, + "slid_loss": 0.8718, + "step": 1692, + "time": 11.4 + }, + { + "epoch": 1.63, + "learning_rate": "1.8533e-04", + "loss": 0.863, + "slid_loss": 0.8717, + "step": 1693, + "time": 13.52 + }, + { + "epoch": 1.63, + "learning_rate": "1.8531e-04", + "loss": 0.9842, + "slid_loss": 0.8728, + "step": 1694, + "time": 13.17 + }, + { + "epoch": 1.63, + "learning_rate": "1.8528e-04", + "loss": 0.8556, + "slid_loss": 0.872, + "step": 1695, + "time": 13.38 + }, + { + "epoch": 1.63, + "learning_rate": "1.8526e-04", + "loss": 0.8397, + "slid_loss": 0.8727, + "step": 1696, + "time": 13.26 + }, + { + "epoch": 1.63, + "learning_rate": "1.8524e-04", + "loss": 0.9243, + "slid_loss": 0.8739, + "step": 1697, + "time": 13.7 + }, + { + "epoch": 1.63, + "learning_rate": "1.8521e-04", + "loss": 0.9987, + "slid_loss": 0.8749, + "step": 1698, + "time": 13.79 + }, + { + "epoch": 1.63, + "learning_rate": "1.8519e-04", + "loss": 0.9109, + "slid_loss": 0.8753, + "step": 1699, + "time": 13.71 + }, + { + "epoch": 1.63, + "learning_rate": "1.8517e-04", + "loss": 0.7638, + "slid_loss": 0.8738, + "step": 1700, + "time": 11.29 + }, + { + "epoch": 1.63, + "learning_rate": "1.8514e-04", + "loss": 0.9244, + "slid_loss": 0.8736, + "step": 1701, + "time": 13.82 + }, + { + "epoch": 1.63, + "learning_rate": "1.8512e-04", + "loss": 0.8626, + "slid_loss": 0.8734, + "step": 1702, + "time": 11.57 + }, + { + "epoch": 1.64, + "learning_rate": "1.8509e-04", + "loss": 0.9134, + "slid_loss": 0.8723, + "step": 1703, + "time": 13.7 + }, + { + "epoch": 1.64, + "learning_rate": "1.8507e-04", + "loss": 0.8617, + "slid_loss": 0.8721, + "step": 1704, + "time": 13.46 + }, + { + "epoch": 1.64, + "learning_rate": "1.8505e-04", + "loss": 0.9085, + "slid_loss": 0.8717, + "step": 1705, + "time": 14.01 + }, + { + "epoch": 1.64, + "learning_rate": "1.8502e-04", + "loss": 0.8481, + "slid_loss": 0.8713, + "step": 1706, + "time": 14.04 + }, + { + "epoch": 1.64, + "learning_rate": "1.8500e-04", + "loss": 0.8989, + "slid_loss": 0.8717, + "step": 1707, + "time": 13.21 + }, + { + "epoch": 1.64, + "learning_rate": "1.8497e-04", + "loss": 0.8927, + "slid_loss": 0.871, + "step": 1708, + "time": 14.23 + }, + { + "epoch": 1.64, + "learning_rate": "1.8495e-04", + "loss": 0.7777, + "slid_loss": 0.8698, + "step": 1709, + "time": 12.36 + }, + { + "epoch": 1.64, + "learning_rate": "1.8493e-04", + "loss": 0.8989, + "slid_loss": 0.8702, + "step": 1710, + "time": 13.14 + }, + { + "epoch": 1.64, + "learning_rate": "1.8490e-04", + "loss": 0.8343, + "slid_loss": 0.8684, + "step": 1711, + "time": 10.76 + }, + { + "epoch": 1.64, + "learning_rate": "1.8488e-04", + "loss": 0.8807, + "slid_loss": 0.8692, + "step": 1712, + "time": 14.31 + }, + { + "epoch": 1.65, + "learning_rate": "1.8485e-04", + "loss": 0.8262, + "slid_loss": 0.8674, + "step": 1713, + "time": 13.11 + }, + { + "epoch": 1.65, + "learning_rate": "1.8483e-04", + "loss": 0.9448, + "slid_loss": 0.8687, + "step": 1714, + "time": 13.5 + }, + { + "epoch": 1.65, + "learning_rate": "1.8481e-04", + "loss": 0.8627, + "slid_loss": 0.8683, + "step": 1715, + "time": 13.46 + }, + { + "epoch": 1.65, + "learning_rate": "1.8478e-04", + "loss": 0.8639, + "slid_loss": 0.8692, + "step": 1716, + "time": 12.35 + }, + { + "epoch": 1.65, + "learning_rate": "1.8476e-04", + "loss": 0.8812, + "slid_loss": 0.869, + "step": 1717, + "time": 11.79 + }, + { + "epoch": 1.65, + "learning_rate": "1.8473e-04", + "loss": 0.7212, + "slid_loss": 0.8682, + "step": 1718, + "time": 11.13 + }, + { + "epoch": 1.65, + "learning_rate": "1.8471e-04", + "loss": 0.8801, + "slid_loss": 0.8685, + "step": 1719, + "time": 13.02 + }, + { + "epoch": 1.65, + "learning_rate": "1.8469e-04", + "loss": 0.8134, + "slid_loss": 0.869, + "step": 1720, + "time": 13.4 + }, + { + "epoch": 1.65, + "learning_rate": "1.8466e-04", + "loss": 0.8531, + "slid_loss": 0.8688, + "step": 1721, + "time": 14.14 + }, + { + "epoch": 1.65, + "learning_rate": "1.8464e-04", + "loss": 0.9186, + "slid_loss": 0.8685, + "step": 1722, + "time": 13.58 + }, + { + "epoch": 1.66, + "learning_rate": "1.8461e-04", + "loss": 0.8145, + "slid_loss": 0.8688, + "step": 1723, + "time": 11.04 + }, + { + "epoch": 1.66, + "learning_rate": "1.8459e-04", + "loss": 0.8877, + "slid_loss": 0.8695, + "step": 1724, + "time": 13.58 + }, + { + "epoch": 1.66, + "learning_rate": "1.8456e-04", + "loss": 0.872, + "slid_loss": 0.8697, + "step": 1725, + "time": 13.6 + }, + { + "epoch": 1.66, + "learning_rate": "1.8454e-04", + "loss": 0.9254, + "slid_loss": 0.8705, + "step": 1726, + "time": 12.78 + }, + { + "epoch": 1.66, + "learning_rate": "1.8452e-04", + "loss": 0.8998, + "slid_loss": 0.8708, + "step": 1727, + "time": 12.04 + }, + { + "epoch": 1.66, + "learning_rate": "1.8449e-04", + "loss": 0.8893, + "slid_loss": 0.8712, + "step": 1728, + "time": 11.42 + }, + { + "epoch": 1.66, + "learning_rate": "1.8447e-04", + "loss": 0.8752, + "slid_loss": 0.8711, + "step": 1729, + "time": 14.03 + }, + { + "epoch": 1.66, + "learning_rate": "1.8444e-04", + "loss": 0.8574, + "slid_loss": 0.8708, + "step": 1730, + "time": 13.53 + }, + { + "epoch": 1.66, + "learning_rate": "1.8442e-04", + "loss": 0.7861, + "slid_loss": 0.8697, + "step": 1731, + "time": 12.95 + }, + { + "epoch": 1.66, + "learning_rate": "1.8439e-04", + "loss": 0.8442, + "slid_loss": 0.8686, + "step": 1732, + "time": 14.15 + }, + { + "epoch": 1.66, + "learning_rate": "1.8437e-04", + "loss": 0.9104, + "slid_loss": 0.8694, + "step": 1733, + "time": 13.7 + }, + { + "epoch": 1.67, + "learning_rate": "1.8435e-04", + "loss": 0.8555, + "slid_loss": 0.8689, + "step": 1734, + "time": 13.5 + }, + { + "epoch": 1.67, + "learning_rate": "1.8432e-04", + "loss": 0.9634, + "slid_loss": 0.8695, + "step": 1735, + "time": 12.9 + }, + { + "epoch": 1.67, + "learning_rate": "1.8430e-04", + "loss": 0.8647, + "slid_loss": 0.8697, + "step": 1736, + "time": 12.25 + }, + { + "epoch": 1.67, + "learning_rate": "1.8427e-04", + "loss": 0.9067, + "slid_loss": 0.8705, + "step": 1737, + "time": 13.61 + }, + { + "epoch": 1.67, + "learning_rate": "1.8425e-04", + "loss": 0.9001, + "slid_loss": 0.8703, + "step": 1738, + "time": 13.16 + }, + { + "epoch": 1.67, + "learning_rate": "1.8422e-04", + "loss": 0.7648, + "slid_loss": 0.8684, + "step": 1739, + "time": 13.9 + }, + { + "epoch": 1.67, + "learning_rate": "1.8420e-04", + "loss": 0.8603, + "slid_loss": 0.8675, + "step": 1740, + "time": 12.73 + }, + { + "epoch": 1.67, + "learning_rate": "1.8417e-04", + "loss": 0.8236, + "slid_loss": 0.8668, + "step": 1741, + "time": 13.83 + }, + { + "epoch": 1.67, + "learning_rate": "1.8415e-04", + "loss": 0.8864, + "slid_loss": 0.8666, + "step": 1742, + "time": 13.61 + }, + { + "epoch": 1.67, + "learning_rate": "1.8413e-04", + "loss": 0.8332, + "slid_loss": 0.8664, + "step": 1743, + "time": 12.19 + }, + { + "epoch": 1.68, + "learning_rate": "1.8410e-04", + "loss": 0.8174, + "slid_loss": 0.8667, + "step": 1744, + "time": 13.5 + }, + { + "epoch": 1.68, + "learning_rate": "1.8408e-04", + "loss": 0.8142, + "slid_loss": 0.8649, + "step": 1745, + "time": 13.75 + }, + { + "epoch": 1.68, + "learning_rate": "1.8405e-04", + "loss": 0.9411, + "slid_loss": 0.8653, + "step": 1746, + "time": 13.99 + }, + { + "epoch": 1.68, + "learning_rate": "1.8403e-04", + "loss": 0.915, + "slid_loss": 0.8658, + "step": 1747, + "time": 12.93 + }, + { + "epoch": 1.68, + "learning_rate": "1.8400e-04", + "loss": 0.8585, + "slid_loss": 0.866, + "step": 1748, + "time": 13.43 + }, + { + "epoch": 1.68, + "learning_rate": "1.8398e-04", + "loss": 0.8574, + "slid_loss": 0.8666, + "step": 1749, + "time": 11.74 + }, + { + "epoch": 1.68, + "learning_rate": "1.8395e-04", + "loss": 0.7164, + "slid_loss": 0.8658, + "step": 1750, + "time": 12.9 + }, + { + "epoch": 1.68, + "learning_rate": "1.8393e-04", + "loss": 0.8656, + "slid_loss": 0.8667, + "step": 1751, + "time": 13.71 + }, + { + "epoch": 1.68, + "learning_rate": "1.8390e-04", + "loss": 0.8621, + "slid_loss": 0.8666, + "step": 1752, + "time": 13.45 + }, + { + "epoch": 1.68, + "learning_rate": "1.8388e-04", + "loss": 0.8397, + "slid_loss": 0.8671, + "step": 1753, + "time": 12.36 + }, + { + "epoch": 1.68, + "learning_rate": "1.8385e-04", + "loss": 0.9576, + "slid_loss": 0.8672, + "step": 1754, + "time": 13.58 + }, + { + "epoch": 1.69, + "learning_rate": "1.8383e-04", + "loss": 0.8619, + "slid_loss": 0.8684, + "step": 1755, + "time": 13.49 + }, + { + "epoch": 1.69, + "learning_rate": "1.8381e-04", + "loss": 0.9332, + "slid_loss": 0.8683, + "step": 1756, + "time": 13.35 + }, + { + "epoch": 1.69, + "learning_rate": "1.8378e-04", + "loss": 0.8423, + "slid_loss": 0.8679, + "step": 1757, + "time": 13.26 + }, + { + "epoch": 1.69, + "learning_rate": "1.8376e-04", + "loss": 0.8339, + "slid_loss": 0.8668, + "step": 1758, + "time": 14.07 + }, + { + "epoch": 1.69, + "learning_rate": "1.8373e-04", + "loss": 0.9393, + "slid_loss": 0.8675, + "step": 1759, + "time": 13.87 + }, + { + "epoch": 1.69, + "learning_rate": "1.8371e-04", + "loss": 0.8294, + "slid_loss": 0.8677, + "step": 1760, + "time": 11.34 + }, + { + "epoch": 1.69, + "learning_rate": "1.8368e-04", + "loss": 0.9034, + "slid_loss": 0.8683, + "step": 1761, + "time": 13.3 + }, + { + "epoch": 1.69, + "learning_rate": "1.8366e-04", + "loss": 0.7987, + "slid_loss": 0.8677, + "step": 1762, + "time": 13.5 + }, + { + "epoch": 1.69, + "learning_rate": "1.8363e-04", + "loss": 0.8401, + "slid_loss": 0.8672, + "step": 1763, + "time": 12.94 + }, + { + "epoch": 1.69, + "learning_rate": "1.8361e-04", + "loss": 0.8723, + "slid_loss": 0.8673, + "step": 1764, + "time": 12.75 + }, + { + "epoch": 1.7, + "learning_rate": "1.8358e-04", + "loss": 0.8458, + "slid_loss": 0.8667, + "step": 1765, + "time": 13.17 + }, + { + "epoch": 1.7, + "learning_rate": "1.8356e-04", + "loss": 0.8551, + "slid_loss": 0.8676, + "step": 1766, + "time": 13.9 + }, + { + "epoch": 1.7, + "learning_rate": "1.8353e-04", + "loss": 0.8023, + "slid_loss": 0.8658, + "step": 1767, + "time": 13.46 + }, + { + "epoch": 1.7, + "learning_rate": "1.8351e-04", + "loss": 0.835, + "slid_loss": 0.8658, + "step": 1768, + "time": 11.88 + }, + { + "epoch": 1.7, + "learning_rate": "1.8348e-04", + "loss": 0.9774, + "slid_loss": 0.8671, + "step": 1769, + "time": 12.77 + }, + { + "epoch": 1.7, + "learning_rate": "1.8346e-04", + "loss": 0.8232, + "slid_loss": 0.8651, + "step": 1770, + "time": 13.94 + }, + { + "epoch": 1.7, + "learning_rate": "1.8343e-04", + "loss": 0.8366, + "slid_loss": 0.8639, + "step": 1771, + "time": 13.43 + }, + { + "epoch": 1.7, + "learning_rate": "1.8341e-04", + "loss": 0.8329, + "slid_loss": 0.8636, + "step": 1772, + "time": 10.84 + }, + { + "epoch": 1.7, + "learning_rate": "1.8338e-04", + "loss": 0.8345, + "slid_loss": 0.8637, + "step": 1773, + "time": 13.95 + }, + { + "epoch": 1.7, + "learning_rate": "1.8336e-04", + "loss": 0.8691, + "slid_loss": 0.8644, + "step": 1774, + "time": 12.97 + }, + { + "epoch": 1.71, + "learning_rate": "1.8333e-04", + "loss": 0.8222, + "slid_loss": 0.8632, + "step": 1775, + "time": 13.77 + }, + { + "epoch": 1.71, + "learning_rate": "1.8331e-04", + "loss": 0.8095, + "slid_loss": 0.8627, + "step": 1776, + "time": 14.3 + }, + { + "epoch": 1.71, + "learning_rate": "1.8328e-04", + "loss": 0.8935, + "slid_loss": 0.8624, + "step": 1777, + "time": 13.75 + }, + { + "epoch": 1.71, + "learning_rate": "1.8326e-04", + "loss": 0.902, + "slid_loss": 0.8636, + "step": 1778, + "time": 13.11 + }, + { + "epoch": 1.71, + "learning_rate": "1.8323e-04", + "loss": 0.8801, + "slid_loss": 0.864, + "step": 1779, + "time": 11.65 + }, + { + "epoch": 1.71, + "learning_rate": "1.8321e-04", + "loss": 0.9188, + "slid_loss": 0.8653, + "step": 1780, + "time": 13.41 + }, + { + "epoch": 1.71, + "learning_rate": "1.8318e-04", + "loss": 0.8929, + "slid_loss": 0.8662, + "step": 1781, + "time": 13.97 + }, + { + "epoch": 1.71, + "learning_rate": "1.8316e-04", + "loss": 0.837, + "slid_loss": 0.8662, + "step": 1782, + "time": 12.83 + }, + { + "epoch": 1.71, + "learning_rate": "1.8313e-04", + "loss": 0.7895, + "slid_loss": 0.8655, + "step": 1783, + "time": 13.1 + }, + { + "epoch": 1.71, + "learning_rate": "1.8311e-04", + "loss": 0.8354, + "slid_loss": 0.8648, + "step": 1784, + "time": 13.27 + }, + { + "epoch": 1.71, + "learning_rate": "1.8308e-04", + "loss": 0.8624, + "slid_loss": 0.8645, + "step": 1785, + "time": 12.81 + }, + { + "epoch": 1.72, + "learning_rate": "1.8306e-04", + "loss": 0.7806, + "slid_loss": 0.8637, + "step": 1786, + "time": 12.92 + }, + { + "epoch": 1.72, + "learning_rate": "1.8303e-04", + "loss": 0.8302, + "slid_loss": 0.8629, + "step": 1787, + "time": 13.83 + }, + { + "epoch": 1.72, + "learning_rate": "1.8301e-04", + "loss": 0.8791, + "slid_loss": 0.8641, + "step": 1788, + "time": 11.45 + }, + { + "epoch": 1.72, + "learning_rate": "1.8298e-04", + "loss": 0.8754, + "slid_loss": 0.8645, + "step": 1789, + "time": 14.2 + }, + { + "epoch": 1.72, + "learning_rate": "1.8296e-04", + "loss": 0.8404, + "slid_loss": 0.865, + "step": 1790, + "time": 11.77 + }, + { + "epoch": 1.72, + "learning_rate": "1.8293e-04", + "loss": 0.7928, + "slid_loss": 0.8638, + "step": 1791, + "time": 13.19 + }, + { + "epoch": 1.72, + "learning_rate": "1.8291e-04", + "loss": 0.8119, + "slid_loss": 0.8629, + "step": 1792, + "time": 13.43 + }, + { + "epoch": 1.72, + "learning_rate": "1.8288e-04", + "loss": 0.8285, + "slid_loss": 0.8626, + "step": 1793, + "time": 13.36 + }, + { + "epoch": 1.72, + "learning_rate": "1.8286e-04", + "loss": 0.8777, + "slid_loss": 0.8615, + "step": 1794, + "time": 14.13 + }, + { + "epoch": 1.72, + "learning_rate": "1.8283e-04", + "loss": 0.8875, + "slid_loss": 0.8618, + "step": 1795, + "time": 12.82 + }, + { + "epoch": 1.73, + "learning_rate": "1.8281e-04", + "loss": 0.8827, + "slid_loss": 0.8623, + "step": 1796, + "time": 13.38 + }, + { + "epoch": 1.73, + "learning_rate": "1.8278e-04", + "loss": 0.9082, + "slid_loss": 0.8621, + "step": 1797, + "time": 13.75 + }, + { + "epoch": 1.73, + "learning_rate": "1.8275e-04", + "loss": 0.8735, + "slid_loss": 0.8609, + "step": 1798, + "time": 13.73 + }, + { + "epoch": 1.73, + "learning_rate": "1.8273e-04", + "loss": 0.9085, + "slid_loss": 0.8608, + "step": 1799, + "time": 13.59 + }, + { + "epoch": 1.73, + "learning_rate": "1.8270e-04", + "loss": 0.8658, + "slid_loss": 0.8619, + "step": 1800, + "time": 13.54 + }, + { + "epoch": 1.73, + "learning_rate": "1.8268e-04", + "loss": 0.8491, + "slid_loss": 0.8611, + "step": 1801, + "time": 13.01 + }, + { + "epoch": 1.73, + "learning_rate": "1.8265e-04", + "loss": 0.7872, + "slid_loss": 0.8604, + "step": 1802, + "time": 13.43 + }, + { + "epoch": 1.73, + "learning_rate": "1.8263e-04", + "loss": 0.7199, + "slid_loss": 0.8584, + "step": 1803, + "time": 13.35 + }, + { + "epoch": 1.73, + "learning_rate": "1.8260e-04", + "loss": 0.9142, + "slid_loss": 0.8589, + "step": 1804, + "time": 13.22 + }, + { + "epoch": 1.73, + "learning_rate": "1.8258e-04", + "loss": 0.9352, + "slid_loss": 0.8592, + "step": 1805, + "time": 11.36 + }, + { + "epoch": 1.73, + "learning_rate": "1.8255e-04", + "loss": 0.8022, + "slid_loss": 0.8587, + "step": 1806, + "time": 13.58 + }, + { + "epoch": 1.74, + "learning_rate": "1.8253e-04", + "loss": 0.8291, + "slid_loss": 0.8581, + "step": 1807, + "time": 15.22 + }, + { + "epoch": 1.74, + "learning_rate": "1.8250e-04", + "loss": 0.9208, + "slid_loss": 0.8583, + "step": 1808, + "time": 13.86 + }, + { + "epoch": 1.74, + "learning_rate": "1.8247e-04", + "loss": 0.8905, + "slid_loss": 0.8595, + "step": 1809, + "time": 12.11 + }, + { + "epoch": 1.74, + "learning_rate": "1.8245e-04", + "loss": 0.7648, + "slid_loss": 0.8581, + "step": 1810, + "time": 13.09 + }, + { + "epoch": 1.74, + "learning_rate": "1.8242e-04", + "loss": 0.8795, + "slid_loss": 0.8586, + "step": 1811, + "time": 13.86 + }, + { + "epoch": 1.74, + "learning_rate": "1.8240e-04", + "loss": 0.869, + "slid_loss": 0.8585, + "step": 1812, + "time": 13.88 + }, + { + "epoch": 1.74, + "learning_rate": "1.8237e-04", + "loss": 0.8434, + "slid_loss": 0.8586, + "step": 1813, + "time": 12.78 + }, + { + "epoch": 1.74, + "learning_rate": "1.8235e-04", + "loss": 0.8733, + "slid_loss": 0.8579, + "step": 1814, + "time": 13.93 + }, + { + "epoch": 1.74, + "learning_rate": "1.8232e-04", + "loss": 0.8318, + "slid_loss": 0.8576, + "step": 1815, + "time": 13.92 + }, + { + "epoch": 1.74, + "learning_rate": "1.8230e-04", + "loss": 0.8529, + "slid_loss": 0.8575, + "step": 1816, + "time": 13.88 + }, + { + "epoch": 1.75, + "learning_rate": "1.8227e-04", + "loss": 0.8967, + "slid_loss": 0.8576, + "step": 1817, + "time": 12.02 + }, + { + "epoch": 1.75, + "learning_rate": "1.8224e-04", + "loss": 0.8689, + "slid_loss": 0.8591, + "step": 1818, + "time": 12.4 + }, + { + "epoch": 1.75, + "learning_rate": "1.8222e-04", + "loss": 0.8187, + "slid_loss": 0.8585, + "step": 1819, + "time": 12.1 + }, + { + "epoch": 1.75, + "learning_rate": "1.8219e-04", + "loss": 0.9251, + "slid_loss": 0.8596, + "step": 1820, + "time": 13.55 + }, + { + "epoch": 1.75, + "learning_rate": "1.8217e-04", + "loss": 0.7818, + "slid_loss": 0.8589, + "step": 1821, + "time": 13.58 + }, + { + "epoch": 1.75, + "learning_rate": "1.8214e-04", + "loss": 0.8169, + "slid_loss": 0.8579, + "step": 1822, + "time": 13.59 + }, + { + "epoch": 1.75, + "learning_rate": "1.8212e-04", + "loss": 0.8767, + "slid_loss": 0.8585, + "step": 1823, + "time": 12.17 + }, + { + "epoch": 1.75, + "learning_rate": "1.8209e-04", + "loss": 0.7161, + "slid_loss": 0.8568, + "step": 1824, + "time": 11.85 + }, + { + "epoch": 1.75, + "learning_rate": "1.8207e-04", + "loss": 0.8245, + "slid_loss": 0.8563, + "step": 1825, + "time": 12.41 + }, + { + "epoch": 1.75, + "learning_rate": "1.8204e-04", + "loss": 0.8785, + "slid_loss": 0.8559, + "step": 1826, + "time": 14.05 + }, + { + "epoch": 1.76, + "learning_rate": "1.8201e-04", + "loss": 0.7711, + "slid_loss": 0.8546, + "step": 1827, + "time": 14.38 + }, + { + "epoch": 1.76, + "learning_rate": "1.8199e-04", + "loss": 0.8264, + "slid_loss": 0.8539, + "step": 1828, + "time": 13.79 + }, + { + "epoch": 1.76, + "learning_rate": "1.8196e-04", + "loss": 0.8566, + "slid_loss": 0.8538, + "step": 1829, + "time": 13.01 + }, + { + "epoch": 1.76, + "learning_rate": "1.8194e-04", + "loss": 0.8851, + "slid_loss": 0.854, + "step": 1830, + "time": 11.95 + }, + { + "epoch": 1.76, + "learning_rate": "1.8191e-04", + "loss": 0.8069, + "slid_loss": 0.8542, + "step": 1831, + "time": 11.2 + }, + { + "epoch": 1.76, + "learning_rate": "1.8188e-04", + "loss": 0.8387, + "slid_loss": 0.8542, + "step": 1832, + "time": 13.93 + }, + { + "epoch": 1.76, + "learning_rate": "1.8186e-04", + "loss": 0.7812, + "slid_loss": 0.8529, + "step": 1833, + "time": 11.98 + }, + { + "epoch": 1.76, + "learning_rate": "1.8183e-04", + "loss": 0.8376, + "slid_loss": 0.8527, + "step": 1834, + "time": 13.66 + }, + { + "epoch": 1.76, + "learning_rate": "1.8181e-04", + "loss": 0.9116, + "slid_loss": 0.8522, + "step": 1835, + "time": 13.73 + }, + { + "epoch": 1.76, + "learning_rate": "1.8178e-04", + "loss": 0.9657, + "slid_loss": 0.8532, + "step": 1836, + "time": 12.8 + }, + { + "epoch": 1.76, + "learning_rate": "1.8176e-04", + "loss": 0.9019, + "slid_loss": 0.8532, + "step": 1837, + "time": 13.46 + }, + { + "epoch": 1.77, + "learning_rate": "1.8173e-04", + "loss": 1.1192, + "slid_loss": 0.8554, + "step": 1838, + "time": 12.55 + }, + { + "epoch": 1.77, + "learning_rate": "1.8170e-04", + "loss": 0.8569, + "slid_loss": 0.8563, + "step": 1839, + "time": 13.83 + }, + { + "epoch": 1.77, + "learning_rate": "1.8168e-04", + "loss": 0.8324, + "slid_loss": 0.856, + "step": 1840, + "time": 12.38 + }, + { + "epoch": 1.77, + "learning_rate": "1.8165e-04", + "loss": 0.832, + "slid_loss": 0.8561, + "step": 1841, + "time": 12.03 + }, + { + "epoch": 1.77, + "learning_rate": "1.8163e-04", + "loss": 0.9014, + "slid_loss": 0.8562, + "step": 1842, + "time": 11.96 + }, + { + "epoch": 1.77, + "learning_rate": "1.8160e-04", + "loss": 0.7229, + "slid_loss": 0.8551, + "step": 1843, + "time": 13.64 + }, + { + "epoch": 1.77, + "learning_rate": "1.8157e-04", + "loss": 0.8013, + "slid_loss": 0.855, + "step": 1844, + "time": 13.78 + }, + { + "epoch": 1.77, + "learning_rate": "1.8155e-04", + "loss": 0.8193, + "slid_loss": 0.855, + "step": 1845, + "time": 12.07 + }, + { + "epoch": 1.77, + "learning_rate": "1.8152e-04", + "loss": 0.8283, + "slid_loss": 0.8539, + "step": 1846, + "time": 12.31 + }, + { + "epoch": 1.77, + "learning_rate": "1.8150e-04", + "loss": 0.8036, + "slid_loss": 0.8528, + "step": 1847, + "time": 14.03 + }, + { + "epoch": 1.78, + "learning_rate": "1.8147e-04", + "loss": 0.7926, + "slid_loss": 0.8521, + "step": 1848, + "time": 13.89 + }, + { + "epoch": 1.78, + "learning_rate": "1.8144e-04", + "loss": 0.8552, + "slid_loss": 0.8521, + "step": 1849, + "time": 13.71 + }, + { + "epoch": 1.78, + "learning_rate": "1.8142e-04", + "loss": 0.8418, + "slid_loss": 0.8533, + "step": 1850, + "time": 13.9 + }, + { + "epoch": 1.78, + "learning_rate": "1.8139e-04", + "loss": 0.8412, + "slid_loss": 0.8531, + "step": 1851, + "time": 13.14 + }, + { + "epoch": 1.78, + "learning_rate": "1.8137e-04", + "loss": 0.8781, + "slid_loss": 0.8533, + "step": 1852, + "time": 13.61 + }, + { + "epoch": 1.78, + "learning_rate": "1.8134e-04", + "loss": 0.9458, + "slid_loss": 0.8543, + "step": 1853, + "time": 14.18 + }, + { + "epoch": 1.78, + "learning_rate": "1.8131e-04", + "loss": 0.7624, + "slid_loss": 0.8524, + "step": 1854, + "time": 13.67 + }, + { + "epoch": 1.78, + "learning_rate": "1.8129e-04", + "loss": 0.8608, + "slid_loss": 0.8524, + "step": 1855, + "time": 12.88 + }, + { + "epoch": 1.78, + "learning_rate": "1.8126e-04", + "loss": 0.8348, + "slid_loss": 0.8514, + "step": 1856, + "time": 13.09 + }, + { + "epoch": 1.78, + "learning_rate": "1.8123e-04", + "loss": 0.7263, + "slid_loss": 0.8502, + "step": 1857, + "time": 13.75 + }, + { + "epoch": 1.78, + "learning_rate": "1.8121e-04", + "loss": 0.8047, + "slid_loss": 0.8499, + "step": 1858, + "time": 13.28 + }, + { + "epoch": 1.79, + "learning_rate": "1.8118e-04", + "loss": 0.8424, + "slid_loss": 0.849, + "step": 1859, + "time": 13.67 + }, + { + "epoch": 1.79, + "learning_rate": "1.8116e-04", + "loss": 0.7473, + "slid_loss": 0.8481, + "step": 1860, + "time": 11.47 + }, + { + "epoch": 1.79, + "learning_rate": "1.8113e-04", + "loss": 0.8913, + "slid_loss": 0.848, + "step": 1861, + "time": 13.85 + }, + { + "epoch": 1.79, + "learning_rate": "1.8110e-04", + "loss": 0.8397, + "slid_loss": 0.8484, + "step": 1862, + "time": 13.45 + }, + { + "epoch": 1.79, + "learning_rate": "1.8108e-04", + "loss": 0.7902, + "slid_loss": 0.8479, + "step": 1863, + "time": 12.14 + }, + { + "epoch": 1.79, + "learning_rate": "1.8105e-04", + "loss": 0.8632, + "slid_loss": 0.8478, + "step": 1864, + "time": 13.27 + }, + { + "epoch": 1.79, + "learning_rate": "1.8102e-04", + "loss": 0.885, + "slid_loss": 0.8482, + "step": 1865, + "time": 13.35 + }, + { + "epoch": 1.79, + "learning_rate": "1.8100e-04", + "loss": 0.8427, + "slid_loss": 0.8481, + "step": 1866, + "time": 13.45 + }, + { + "epoch": 1.79, + "learning_rate": "1.8097e-04", + "loss": 0.8384, + "slid_loss": 0.8485, + "step": 1867, + "time": 14.14 + }, + { + "epoch": 1.79, + "learning_rate": "1.8095e-04", + "loss": 0.8286, + "slid_loss": 0.8484, + "step": 1868, + "time": 13.5 + }, + { + "epoch": 1.8, + "learning_rate": "1.8092e-04", + "loss": 0.818, + "slid_loss": 0.8468, + "step": 1869, + "time": 14.14 + }, + { + "epoch": 1.8, + "learning_rate": "1.8089e-04", + "loss": 0.8532, + "slid_loss": 0.8471, + "step": 1870, + "time": 13.24 + }, + { + "epoch": 1.8, + "learning_rate": "1.8087e-04", + "loss": 0.884, + "slid_loss": 0.8476, + "step": 1871, + "time": 12.87 + }, + { + "epoch": 1.8, + "learning_rate": "1.8084e-04", + "loss": 0.948, + "slid_loss": 0.8487, + "step": 1872, + "time": 13.13 + }, + { + "epoch": 1.8, + "learning_rate": "1.8081e-04", + "loss": 0.8873, + "slid_loss": 0.8493, + "step": 1873, + "time": 13.41 + }, + { + "epoch": 1.8, + "learning_rate": "1.8079e-04", + "loss": 0.7586, + "slid_loss": 0.8482, + "step": 1874, + "time": 14.2 + }, + { + "epoch": 1.8, + "learning_rate": "1.8076e-04", + "loss": 0.8074, + "slid_loss": 0.848, + "step": 1875, + "time": 12.28 + }, + { + "epoch": 1.8, + "learning_rate": "1.8073e-04", + "loss": 0.8166, + "slid_loss": 0.8481, + "step": 1876, + "time": 12.66 + }, + { + "epoch": 1.8, + "learning_rate": "1.8071e-04", + "loss": 0.859, + "slid_loss": 0.8477, + "step": 1877, + "time": 13.13 + }, + { + "epoch": 1.8, + "learning_rate": "1.8068e-04", + "loss": 0.8403, + "slid_loss": 0.8471, + "step": 1878, + "time": 13.42 + }, + { + "epoch": 1.8, + "learning_rate": "1.8065e-04", + "loss": 0.8395, + "slid_loss": 0.8467, + "step": 1879, + "time": 13.2 + }, + { + "epoch": 1.81, + "learning_rate": "1.8063e-04", + "loss": 0.8476, + "slid_loss": 0.846, + "step": 1880, + "time": 13.59 + }, + { + "epoch": 1.81, + "learning_rate": "1.8060e-04", + "loss": 0.9457, + "slid_loss": 0.8465, + "step": 1881, + "time": 13.65 + }, + { + "epoch": 1.81, + "learning_rate": "1.8058e-04", + "loss": 0.8541, + "slid_loss": 0.8467, + "step": 1882, + "time": 13.57 + }, + { + "epoch": 1.81, + "learning_rate": "1.8055e-04", + "loss": 0.9112, + "slid_loss": 0.8479, + "step": 1883, + "time": 13.72 + }, + { + "epoch": 1.81, + "learning_rate": "1.8052e-04", + "loss": 0.9136, + "slid_loss": 0.8487, + "step": 1884, + "time": 13.79 + }, + { + "epoch": 1.81, + "learning_rate": "1.8050e-04", + "loss": 0.8605, + "slid_loss": 0.8487, + "step": 1885, + "time": 13.37 + }, + { + "epoch": 1.81, + "learning_rate": "1.8047e-04", + "loss": 0.8005, + "slid_loss": 0.8489, + "step": 1886, + "time": 14.02 + }, + { + "epoch": 1.81, + "learning_rate": "1.8044e-04", + "loss": 0.8628, + "slid_loss": 0.8492, + "step": 1887, + "time": 13.93 + }, + { + "epoch": 1.81, + "learning_rate": "1.8042e-04", + "loss": 0.8337, + "slid_loss": 0.8487, + "step": 1888, + "time": 14.1 + }, + { + "epoch": 1.81, + "learning_rate": "1.8039e-04", + "loss": 0.7802, + "slid_loss": 0.8478, + "step": 1889, + "time": 13.56 + }, + { + "epoch": 1.82, + "learning_rate": "1.8036e-04", + "loss": 0.8708, + "slid_loss": 0.8481, + "step": 1890, + "time": 12.79 + }, + { + "epoch": 1.82, + "learning_rate": "1.8034e-04", + "loss": 0.8029, + "slid_loss": 0.8482, + "step": 1891, + "time": 11.35 + }, + { + "epoch": 1.82, + "learning_rate": "1.8031e-04", + "loss": 0.8688, + "slid_loss": 0.8488, + "step": 1892, + "time": 13.55 + }, + { + "epoch": 1.82, + "learning_rate": "1.8028e-04", + "loss": 0.8033, + "slid_loss": 0.8485, + "step": 1893, + "time": 13.21 + }, + { + "epoch": 1.82, + "learning_rate": "1.8026e-04", + "loss": 0.7882, + "slid_loss": 0.8476, + "step": 1894, + "time": 13.24 + }, + { + "epoch": 1.82, + "learning_rate": "1.8023e-04", + "loss": 0.8322, + "slid_loss": 0.8471, + "step": 1895, + "time": 13.97 + }, + { + "epoch": 1.82, + "learning_rate": "1.8020e-04", + "loss": 0.7679, + "slid_loss": 0.8459, + "step": 1896, + "time": 13.53 + }, + { + "epoch": 1.82, + "learning_rate": "1.8018e-04", + "loss": 0.7792, + "slid_loss": 0.8446, + "step": 1897, + "time": 13.79 + }, + { + "epoch": 1.82, + "learning_rate": "1.8015e-04", + "loss": 0.7793, + "slid_loss": 0.8437, + "step": 1898, + "time": 13.45 + }, + { + "epoch": 1.82, + "learning_rate": "1.8012e-04", + "loss": 0.8488, + "slid_loss": 0.8431, + "step": 1899, + "time": 13.25 + }, + { + "epoch": 1.83, + "learning_rate": "1.8010e-04", + "loss": 0.9309, + "slid_loss": 0.8437, + "step": 1900, + "time": 12.85 + }, + { + "epoch": 1.83, + "learning_rate": "1.8007e-04", + "loss": 0.8012, + "slid_loss": 0.8433, + "step": 1901, + "time": 13.97 + }, + { + "epoch": 1.83, + "learning_rate": "1.8004e-04", + "loss": 0.8725, + "slid_loss": 0.8441, + "step": 1902, + "time": 14.26 + }, + { + "epoch": 1.83, + "learning_rate": "1.8002e-04", + "loss": 0.8714, + "slid_loss": 0.8456, + "step": 1903, + "time": 13.31 + }, + { + "epoch": 1.83, + "learning_rate": "1.7999e-04", + "loss": 0.8344, + "slid_loss": 0.8448, + "step": 1904, + "time": 13.72 + }, + { + "epoch": 1.83, + "learning_rate": "1.7996e-04", + "loss": 0.7972, + "slid_loss": 0.8434, + "step": 1905, + "time": 13.79 + }, + { + "epoch": 1.83, + "learning_rate": "1.7993e-04", + "loss": 0.8698, + "slid_loss": 0.8441, + "step": 1906, + "time": 13.69 + }, + { + "epoch": 1.83, + "learning_rate": "1.7991e-04", + "loss": 0.8639, + "slid_loss": 0.8445, + "step": 1907, + "time": 13.37 + }, + { + "epoch": 1.83, + "learning_rate": "1.7988e-04", + "loss": 0.8293, + "slid_loss": 0.8436, + "step": 1908, + "time": 10.93 + }, + { + "epoch": 1.83, + "learning_rate": "1.7985e-04", + "loss": 0.8096, + "slid_loss": 0.8427, + "step": 1909, + "time": 14.03 + }, + { + "epoch": 1.83, + "learning_rate": "1.7983e-04", + "loss": 0.798, + "slid_loss": 0.8431, + "step": 1910, + "time": 12.91 + }, + { + "epoch": 1.84, + "learning_rate": "1.7980e-04", + "loss": 0.8463, + "slid_loss": 0.8427, + "step": 1911, + "time": 12.9 + }, + { + "epoch": 1.84, + "learning_rate": "1.7977e-04", + "loss": 0.7989, + "slid_loss": 0.842, + "step": 1912, + "time": 13.76 + }, + { + "epoch": 1.84, + "learning_rate": "1.7975e-04", + "loss": 0.8295, + "slid_loss": 0.8419, + "step": 1913, + "time": 14.01 + }, + { + "epoch": 1.84, + "learning_rate": "1.7972e-04", + "loss": 0.7834, + "slid_loss": 0.841, + "step": 1914, + "time": 13.66 + }, + { + "epoch": 1.84, + "learning_rate": "1.7969e-04", + "loss": 0.7744, + "slid_loss": 0.8404, + "step": 1915, + "time": 12.48 + }, + { + "epoch": 1.84, + "learning_rate": "1.7967e-04", + "loss": 0.9124, + "slid_loss": 0.841, + "step": 1916, + "time": 12.98 + }, + { + "epoch": 1.84, + "learning_rate": "1.7964e-04", + "loss": 0.8509, + "slid_loss": 0.8406, + "step": 1917, + "time": 12.44 + }, + { + "epoch": 1.84, + "learning_rate": "1.7961e-04", + "loss": 0.8908, + "slid_loss": 0.8408, + "step": 1918, + "time": 13.23 + }, + { + "epoch": 1.84, + "learning_rate": "1.7958e-04", + "loss": 0.8188, + "slid_loss": 0.8408, + "step": 1919, + "time": 13.56 + }, + { + "epoch": 1.84, + "learning_rate": "1.7956e-04", + "loss": 0.8608, + "slid_loss": 0.8401, + "step": 1920, + "time": 11.38 + }, + { + "epoch": 1.85, + "learning_rate": "1.7953e-04", + "loss": 0.8698, + "slid_loss": 0.841, + "step": 1921, + "time": 11.74 + }, + { + "epoch": 1.85, + "learning_rate": "1.7950e-04", + "loss": 0.8261, + "slid_loss": 0.8411, + "step": 1922, + "time": 13.43 + }, + { + "epoch": 1.85, + "learning_rate": "1.7948e-04", + "loss": 0.9124, + "slid_loss": 0.8415, + "step": 1923, + "time": 13.34 + }, + { + "epoch": 1.85, + "learning_rate": "1.7945e-04", + "loss": 0.7571, + "slid_loss": 0.8419, + "step": 1924, + "time": 13.98 + }, + { + "epoch": 1.85, + "learning_rate": "1.7942e-04", + "loss": 0.7411, + "slid_loss": 0.8411, + "step": 1925, + "time": 13.39 + }, + { + "epoch": 1.85, + "learning_rate": "1.7939e-04", + "loss": 0.8096, + "slid_loss": 0.8404, + "step": 1926, + "time": 11.23 + }, + { + "epoch": 1.85, + "learning_rate": "1.7937e-04", + "loss": 0.9292, + "slid_loss": 0.8419, + "step": 1927, + "time": 13.38 + }, + { + "epoch": 1.85, + "learning_rate": "1.7934e-04", + "loss": 0.86, + "slid_loss": 0.8423, + "step": 1928, + "time": 13.55 + }, + { + "epoch": 1.85, + "learning_rate": "1.7931e-04", + "loss": 0.8202, + "slid_loss": 0.8419, + "step": 1929, + "time": 11.31 + }, + { + "epoch": 1.85, + "learning_rate": "1.7929e-04", + "loss": 0.8622, + "slid_loss": 0.8417, + "step": 1930, + "time": 13.39 + }, + { + "epoch": 1.85, + "learning_rate": "1.7926e-04", + "loss": 0.888, + "slid_loss": 0.8425, + "step": 1931, + "time": 12.26 + }, + { + "epoch": 1.86, + "learning_rate": "1.7923e-04", + "loss": 0.8174, + "slid_loss": 0.8423, + "step": 1932, + "time": 11.9 + }, + { + "epoch": 1.86, + "learning_rate": "1.7920e-04", + "loss": 0.8287, + "slid_loss": 0.8428, + "step": 1933, + "time": 13.22 + }, + { + "epoch": 1.86, + "learning_rate": "1.7918e-04", + "loss": 0.8023, + "slid_loss": 0.8424, + "step": 1934, + "time": 11.61 + }, + { + "epoch": 1.86, + "learning_rate": "1.7915e-04", + "loss": 0.8942, + "slid_loss": 0.8422, + "step": 1935, + "time": 12.77 + }, + { + "epoch": 1.86, + "learning_rate": "1.7912e-04", + "loss": 0.8124, + "slid_loss": 0.8407, + "step": 1936, + "time": 13.92 + }, + { + "epoch": 1.86, + "learning_rate": "1.7910e-04", + "loss": 0.9002, + "slid_loss": 0.8407, + "step": 1937, + "time": 13.84 + }, + { + "epoch": 1.86, + "learning_rate": "1.7907e-04", + "loss": 0.8668, + "slid_loss": 0.8382, + "step": 1938, + "time": 13.47 + }, + { + "epoch": 1.86, + "learning_rate": "1.7904e-04", + "loss": 0.8447, + "slid_loss": 0.838, + "step": 1939, + "time": 13.02 + }, + { + "epoch": 1.86, + "learning_rate": "1.7901e-04", + "loss": 0.7282, + "slid_loss": 0.837, + "step": 1940, + "time": 12.18 + }, + { + "epoch": 1.86, + "learning_rate": "1.7899e-04", + "loss": 0.852, + "slid_loss": 0.8372, + "step": 1941, + "time": 14.17 + }, + { + "epoch": 1.87, + "learning_rate": "1.7896e-04", + "loss": 0.8132, + "slid_loss": 0.8363, + "step": 1942, + "time": 13.73 + }, + { + "epoch": 1.87, + "learning_rate": "1.7893e-04", + "loss": 0.775, + "slid_loss": 0.8368, + "step": 1943, + "time": 13.36 + }, + { + "epoch": 1.87, + "learning_rate": "1.7890e-04", + "loss": 0.8363, + "slid_loss": 0.8372, + "step": 1944, + "time": 13.6 + }, + { + "epoch": 1.87, + "learning_rate": "1.7888e-04", + "loss": 0.8023, + "slid_loss": 0.837, + "step": 1945, + "time": 13.38 + }, + { + "epoch": 1.87, + "learning_rate": "1.7885e-04", + "loss": 0.8265, + "slid_loss": 0.837, + "step": 1946, + "time": 13.34 + }, + { + "epoch": 1.87, + "learning_rate": "1.7882e-04", + "loss": 0.8685, + "slid_loss": 0.8376, + "step": 1947, + "time": 13.74 + }, + { + "epoch": 1.87, + "learning_rate": "1.7879e-04", + "loss": 0.8309, + "slid_loss": 0.838, + "step": 1948, + "time": 13.12 + }, + { + "epoch": 1.87, + "learning_rate": "1.7877e-04", + "loss": 0.8025, + "slid_loss": 0.8375, + "step": 1949, + "time": 13.72 + }, + { + "epoch": 1.87, + "learning_rate": "1.7874e-04", + "loss": 0.8972, + "slid_loss": 0.8381, + "step": 1950, + "time": 13.99 + }, + { + "epoch": 1.87, + "learning_rate": "1.7871e-04", + "loss": 0.8778, + "slid_loss": 0.8384, + "step": 1951, + "time": 13.47 + }, + { + "epoch": 1.88, + "learning_rate": "1.7868e-04", + "loss": 0.7979, + "slid_loss": 0.8376, + "step": 1952, + "time": 13.34 + }, + { + "epoch": 1.88, + "learning_rate": "1.7866e-04", + "loss": 0.8233, + "slid_loss": 0.8364, + "step": 1953, + "time": 14.73 + }, + { + "epoch": 1.88, + "learning_rate": "1.7863e-04", + "loss": 0.83, + "slid_loss": 0.8371, + "step": 1954, + "time": 13.48 + }, + { + "epoch": 1.88, + "learning_rate": "1.7860e-04", + "loss": 0.7811, + "slid_loss": 0.8363, + "step": 1955, + "time": 14.49 + }, + { + "epoch": 1.88, + "learning_rate": "1.7857e-04", + "loss": 0.7307, + "slid_loss": 0.8352, + "step": 1956, + "time": 13.16 + }, + { + "epoch": 1.88, + "learning_rate": "1.7855e-04", + "loss": 0.7403, + "slid_loss": 0.8354, + "step": 1957, + "time": 14.08 + }, + { + "epoch": 1.88, + "learning_rate": "1.7852e-04", + "loss": 0.7493, + "slid_loss": 0.8348, + "step": 1958, + "time": 12.25 + }, + { + "epoch": 1.88, + "learning_rate": "1.7849e-04", + "loss": 0.7569, + "slid_loss": 0.834, + "step": 1959, + "time": 13.36 + }, + { + "epoch": 1.88, + "learning_rate": "1.7846e-04", + "loss": 0.867, + "slid_loss": 0.8352, + "step": 1960, + "time": 13.45 + }, + { + "epoch": 1.88, + "learning_rate": "1.7844e-04", + "loss": 0.7446, + "slid_loss": 0.8337, + "step": 1961, + "time": 13.24 + }, + { + "epoch": 1.88, + "learning_rate": "1.7841e-04", + "loss": 0.911, + "slid_loss": 0.8344, + "step": 1962, + "time": 13.4 + }, + { + "epoch": 1.89, + "learning_rate": "1.7838e-04", + "loss": 0.8159, + "slid_loss": 0.8347, + "step": 1963, + "time": 12.22 + }, + { + "epoch": 1.89, + "learning_rate": "1.7835e-04", + "loss": 0.7831, + "slid_loss": 0.8339, + "step": 1964, + "time": 13.83 + }, + { + "epoch": 1.89, + "learning_rate": "1.7833e-04", + "loss": 0.7058, + "slid_loss": 0.8321, + "step": 1965, + "time": 13.42 + }, + { + "epoch": 1.89, + "learning_rate": "1.7830e-04", + "loss": 0.8786, + "slid_loss": 0.8324, + "step": 1966, + "time": 13.61 + }, + { + "epoch": 1.89, + "learning_rate": "1.7827e-04", + "loss": 0.8752, + "slid_loss": 0.8328, + "step": 1967, + "time": 13.48 + }, + { + "epoch": 1.89, + "learning_rate": "1.7824e-04", + "loss": 0.7464, + "slid_loss": 0.832, + "step": 1968, + "time": 13.89 + }, + { + "epoch": 1.89, + "learning_rate": "1.7822e-04", + "loss": 0.8721, + "slid_loss": 0.8325, + "step": 1969, + "time": 10.96 + }, + { + "epoch": 1.89, + "learning_rate": "1.7819e-04", + "loss": 0.853, + "slid_loss": 0.8325, + "step": 1970, + "time": 13.58 + }, + { + "epoch": 1.89, + "learning_rate": "1.7816e-04", + "loss": 0.8372, + "slid_loss": 0.832, + "step": 1971, + "time": 12.96 + }, + { + "epoch": 1.89, + "learning_rate": "1.7813e-04", + "loss": 0.7642, + "slid_loss": 0.8302, + "step": 1972, + "time": 12.83 + }, + { + "epoch": 1.9, + "learning_rate": "1.7811e-04", + "loss": 0.8733, + "slid_loss": 0.8301, + "step": 1973, + "time": 11.5 + }, + { + "epoch": 1.9, + "learning_rate": "1.7808e-04", + "loss": 0.8365, + "slid_loss": 0.8308, + "step": 1974, + "time": 12.67 + }, + { + "epoch": 1.9, + "learning_rate": "1.7805e-04", + "loss": 0.7706, + "slid_loss": 0.8305, + "step": 1975, + "time": 12.78 + }, + { + "epoch": 1.9, + "learning_rate": "1.7802e-04", + "loss": 0.7279, + "slid_loss": 0.8296, + "step": 1976, + "time": 13.47 + }, + { + "epoch": 1.9, + "learning_rate": "1.7799e-04", + "loss": 0.8961, + "slid_loss": 0.83, + "step": 1977, + "time": 13.23 + }, + { + "epoch": 1.9, + "learning_rate": "1.7797e-04", + "loss": 0.7487, + "slid_loss": 0.829, + "step": 1978, + "time": 13.23 + }, + { + "epoch": 1.9, + "learning_rate": "1.7794e-04", + "loss": 0.7473, + "slid_loss": 0.8281, + "step": 1979, + "time": 11.32 + }, + { + "epoch": 1.9, + "learning_rate": "1.7791e-04", + "loss": 0.7728, + "slid_loss": 0.8274, + "step": 1980, + "time": 13.21 + }, + { + "epoch": 1.9, + "learning_rate": "1.7788e-04", + "loss": 0.869, + "slid_loss": 0.8266, + "step": 1981, + "time": 13.02 + }, + { + "epoch": 1.9, + "learning_rate": "1.7786e-04", + "loss": 0.8564, + "slid_loss": 0.8266, + "step": 1982, + "time": 10.66 + }, + { + "epoch": 1.9, + "learning_rate": "1.7783e-04", + "loss": 0.8421, + "slid_loss": 0.8259, + "step": 1983, + "time": 11.07 + }, + { + "epoch": 1.91, + "learning_rate": "1.7780e-04", + "loss": 0.837, + "slid_loss": 0.8252, + "step": 1984, + "time": 12.22 + }, + { + "epoch": 1.91, + "learning_rate": "1.7777e-04", + "loss": 0.8567, + "slid_loss": 0.8251, + "step": 1985, + "time": 12.95 + }, + { + "epoch": 1.91, + "learning_rate": "1.7774e-04", + "loss": 0.9824, + "slid_loss": 0.827, + "step": 1986, + "time": 11.88 + }, + { + "epoch": 1.91, + "learning_rate": "1.7772e-04", + "loss": 0.7986, + "slid_loss": 0.8263, + "step": 1987, + "time": 13.42 + }, + { + "epoch": 1.91, + "learning_rate": "1.7769e-04", + "loss": 0.8039, + "slid_loss": 0.826, + "step": 1988, + "time": 13.03 + }, + { + "epoch": 1.91, + "learning_rate": "1.7766e-04", + "loss": 0.7561, + "slid_loss": 0.8258, + "step": 1989, + "time": 13.86 + }, + { + "epoch": 1.91, + "learning_rate": "1.7763e-04", + "loss": 0.8473, + "slid_loss": 0.8255, + "step": 1990, + "time": 13.38 + }, + { + "epoch": 1.91, + "learning_rate": "1.7760e-04", + "loss": 0.8765, + "slid_loss": 0.8263, + "step": 1991, + "time": 12.99 + }, + { + "epoch": 1.91, + "learning_rate": "1.7758e-04", + "loss": 0.8406, + "slid_loss": 0.826, + "step": 1992, + "time": 13.49 + }, + { + "epoch": 1.91, + "learning_rate": "1.7755e-04", + "loss": 0.7393, + "slid_loss": 0.8254, + "step": 1993, + "time": 11.29 + }, + { + "epoch": 1.92, + "learning_rate": "1.7752e-04", + "loss": 0.8802, + "slid_loss": 0.8263, + "step": 1994, + "time": 12.11 + }, + { + "epoch": 1.92, + "learning_rate": "1.7749e-04", + "loss": 0.7405, + "slid_loss": 0.8254, + "step": 1995, + "time": 13.03 + }, + { + "epoch": 1.92, + "learning_rate": "1.7746e-04", + "loss": 0.8566, + "slid_loss": 0.8262, + "step": 1996, + "time": 12.99 + }, + { + "epoch": 1.92, + "learning_rate": "1.7744e-04", + "loss": 0.7462, + "slid_loss": 0.8259, + "step": 1997, + "time": 13.98 + }, + { + "epoch": 1.92, + "learning_rate": "1.7741e-04", + "loss": 0.9288, + "slid_loss": 0.8274, + "step": 1998, + "time": 13.66 + }, + { + "epoch": 1.92, + "learning_rate": "1.7738e-04", + "loss": 0.8569, + "slid_loss": 0.8275, + "step": 1999, + "time": 12.8 + }, + { + "epoch": 1.92, + "learning_rate": "1.7735e-04", + "loss": 0.8382, + "slid_loss": 0.8266, + "step": 2000, + "time": 14.08 + }, + { + "epoch": 1.92, + "learning_rate": "1.7732e-04", + "loss": 0.7676, + "slid_loss": 0.8262, + "step": 2001, + "time": 13.24 + }, + { + "epoch": 1.92, + "learning_rate": "1.7730e-04", + "loss": 0.7996, + "slid_loss": 0.8255, + "step": 2002, + "time": 13.6 + }, + { + "epoch": 1.92, + "learning_rate": "1.7727e-04", + "loss": 0.8044, + "slid_loss": 0.8248, + "step": 2003, + "time": 13.64 + }, + { + "epoch": 1.93, + "learning_rate": "1.7724e-04", + "loss": 0.7631, + "slid_loss": 0.8241, + "step": 2004, + "time": 13.66 + }, + { + "epoch": 1.93, + "learning_rate": "1.7721e-04", + "loss": 0.7932, + "slid_loss": 0.8241, + "step": 2005, + "time": 14.31 + }, + { + "epoch": 1.93, + "learning_rate": "1.7718e-04", + "loss": 0.711, + "slid_loss": 0.8225, + "step": 2006, + "time": 13.36 + }, + { + "epoch": 1.93, + "learning_rate": "1.7716e-04", + "loss": 0.8624, + "slid_loss": 0.8225, + "step": 2007, + "time": 12.94 + }, + { + "epoch": 1.93, + "learning_rate": "1.7713e-04", + "loss": 0.8848, + "slid_loss": 0.823, + "step": 2008, + "time": 11.86 + }, + { + "epoch": 1.93, + "learning_rate": "1.7710e-04", + "loss": 0.8698, + "slid_loss": 0.8236, + "step": 2009, + "time": 14.06 + }, + { + "epoch": 1.93, + "learning_rate": "1.7707e-04", + "loss": 0.8482, + "slid_loss": 0.8241, + "step": 2010, + "time": 11.88 + }, + { + "epoch": 1.93, + "learning_rate": "1.7704e-04", + "loss": 0.903, + "slid_loss": 0.8247, + "step": 2011, + "time": 14.61 + }, + { + "epoch": 1.93, + "learning_rate": "1.7701e-04", + "loss": 0.7087, + "slid_loss": 0.8238, + "step": 2012, + "time": 13.84 + }, + { + "epoch": 1.93, + "learning_rate": "1.7699e-04", + "loss": 0.8457, + "slid_loss": 0.824, + "step": 2013, + "time": 13.25 + }, + { + "epoch": 1.93, + "learning_rate": "1.7696e-04", + "loss": 0.8809, + "slid_loss": 0.8249, + "step": 2014, + "time": 12.86 + }, + { + "epoch": 1.94, + "learning_rate": "1.7693e-04", + "loss": 0.8237, + "slid_loss": 0.8254, + "step": 2015, + "time": 11.07 + }, + { + "epoch": 1.94, + "learning_rate": "1.7690e-04", + "loss": 0.7807, + "slid_loss": 0.8241, + "step": 2016, + "time": 11.92 + }, + { + "epoch": 1.94, + "learning_rate": "1.7687e-04", + "loss": 0.7545, + "slid_loss": 0.8231, + "step": 2017, + "time": 13.62 + }, + { + "epoch": 1.94, + "learning_rate": "1.7684e-04", + "loss": 0.8768, + "slid_loss": 0.823, + "step": 2018, + "time": 14.39 + }, + { + "epoch": 1.94, + "learning_rate": "1.7682e-04", + "loss": 0.7515, + "slid_loss": 0.8223, + "step": 2019, + "time": 13.4 + }, + { + "epoch": 1.94, + "learning_rate": "1.7679e-04", + "loss": 0.7827, + "slid_loss": 0.8216, + "step": 2020, + "time": 13.46 + }, + { + "epoch": 1.94, + "learning_rate": "1.7676e-04", + "loss": 0.7893, + "slid_loss": 0.8207, + "step": 2021, + "time": 13.35 + }, + { + "epoch": 1.94, + "learning_rate": "1.7673e-04", + "loss": 0.8345, + "slid_loss": 0.8208, + "step": 2022, + "time": 11.4 + }, + { + "epoch": 1.94, + "learning_rate": "1.7670e-04", + "loss": 0.7537, + "slid_loss": 0.8192, + "step": 2023, + "time": 11.66 + }, + { + "epoch": 1.94, + "learning_rate": "1.7667e-04", + "loss": 0.8879, + "slid_loss": 0.8206, + "step": 2024, + "time": 13.75 + }, + { + "epoch": 1.95, + "learning_rate": "1.7665e-04", + "loss": 0.7623, + "slid_loss": 0.8208, + "step": 2025, + "time": 13.84 + }, + { + "epoch": 1.95, + "learning_rate": "1.7662e-04", + "loss": 0.8423, + "slid_loss": 0.8211, + "step": 2026, + "time": 12.14 + }, + { + "epoch": 1.95, + "learning_rate": "1.7659e-04", + "loss": 0.789, + "slid_loss": 0.8197, + "step": 2027, + "time": 13.55 + }, + { + "epoch": 1.95, + "learning_rate": "1.7656e-04", + "loss": 0.7936, + "slid_loss": 0.819, + "step": 2028, + "time": 13.21 + }, + { + "epoch": 1.95, + "learning_rate": "1.7653e-04", + "loss": 0.8515, + "slid_loss": 0.8193, + "step": 2029, + "time": 11.79 + }, + { + "epoch": 1.95, + "learning_rate": "1.7650e-04", + "loss": 0.7697, + "slid_loss": 0.8184, + "step": 2030, + "time": 13.22 + }, + { + "epoch": 1.95, + "learning_rate": "1.7648e-04", + "loss": 0.8146, + "slid_loss": 0.8177, + "step": 2031, + "time": 13.21 + }, + { + "epoch": 1.95, + "learning_rate": "1.7645e-04", + "loss": 0.839, + "slid_loss": 0.8179, + "step": 2032, + "time": 11.87 + }, + { + "epoch": 1.95, + "learning_rate": "1.7642e-04", + "loss": 0.8288, + "slid_loss": 0.8179, + "step": 2033, + "time": 13.41 + }, + { + "epoch": 1.95, + "learning_rate": "1.7639e-04", + "loss": 0.7764, + "slid_loss": 0.8176, + "step": 2034, + "time": 13.38 + }, + { + "epoch": 1.95, + "learning_rate": "1.7636e-04", + "loss": 0.7965, + "slid_loss": 0.8167, + "step": 2035, + "time": 11.92 + }, + { + "epoch": 1.96, + "learning_rate": "1.7633e-04", + "loss": 0.8415, + "slid_loss": 0.817, + "step": 2036, + "time": 12.21 + }, + { + "epoch": 1.96, + "learning_rate": "1.7630e-04", + "loss": 0.8479, + "slid_loss": 0.8164, + "step": 2037, + "time": 12.92 + }, + { + "epoch": 1.96, + "learning_rate": "1.7628e-04", + "loss": 0.9035, + "slid_loss": 0.8168, + "step": 2038, + "time": 10.7 + }, + { + "epoch": 1.96, + "learning_rate": "1.7625e-04", + "loss": 0.8547, + "slid_loss": 0.8169, + "step": 2039, + "time": 13.67 + }, + { + "epoch": 1.96, + "learning_rate": "1.7622e-04", + "loss": 0.8842, + "slid_loss": 0.8185, + "step": 2040, + "time": 13.0 + }, + { + "epoch": 1.96, + "learning_rate": "1.7619e-04", + "loss": 0.7438, + "slid_loss": 0.8174, + "step": 2041, + "time": 13.47 + }, + { + "epoch": 1.96, + "learning_rate": "1.7616e-04", + "loss": 0.7569, + "slid_loss": 0.8168, + "step": 2042, + "time": 11.99 + }, + { + "epoch": 1.96, + "learning_rate": "1.7613e-04", + "loss": 0.7766, + "slid_loss": 0.8168, + "step": 2043, + "time": 12.97 + }, + { + "epoch": 1.96, + "learning_rate": "1.7610e-04", + "loss": 0.8183, + "slid_loss": 0.8166, + "step": 2044, + "time": 12.8 + }, + { + "epoch": 1.96, + "learning_rate": "1.7608e-04", + "loss": 0.7463, + "slid_loss": 0.8161, + "step": 2045, + "time": 13.01 + }, + { + "epoch": 1.97, + "learning_rate": "1.7605e-04", + "loss": 0.7882, + "slid_loss": 0.8157, + "step": 2046, + "time": 13.68 + }, + { + "epoch": 1.97, + "learning_rate": "1.7602e-04", + "loss": 0.8785, + "slid_loss": 0.8158, + "step": 2047, + "time": 13.41 + }, + { + "epoch": 1.97, + "learning_rate": "1.7599e-04", + "loss": 0.8151, + "slid_loss": 0.8156, + "step": 2048, + "time": 14.29 + }, + { + "epoch": 1.97, + "learning_rate": "1.7596e-04", + "loss": 0.8491, + "slid_loss": 0.8161, + "step": 2049, + "time": 12.9 + }, + { + "epoch": 1.97, + "learning_rate": "1.7593e-04", + "loss": 0.8557, + "slid_loss": 0.8157, + "step": 2050, + "time": 12.99 + }, + { + "epoch": 1.97, + "learning_rate": "1.7590e-04", + "loss": 0.7562, + "slid_loss": 0.8145, + "step": 2051, + "time": 11.93 + }, + { + "epoch": 1.97, + "learning_rate": "1.7588e-04", + "loss": 0.9049, + "slid_loss": 0.8155, + "step": 2052, + "time": 13.53 + }, + { + "epoch": 1.97, + "learning_rate": "1.7585e-04", + "loss": 0.8791, + "slid_loss": 0.8161, + "step": 2053, + "time": 13.92 + }, + { + "epoch": 1.97, + "learning_rate": "1.7582e-04", + "loss": 0.8745, + "slid_loss": 0.8166, + "step": 2054, + "time": 12.88 + }, + { + "epoch": 1.97, + "learning_rate": "1.7579e-04", + "loss": 0.7358, + "slid_loss": 0.8161, + "step": 2055, + "time": 13.16 + }, + { + "epoch": 1.98, + "learning_rate": "1.7576e-04", + "loss": 0.8981, + "slid_loss": 0.8178, + "step": 2056, + "time": 13.04 + }, + { + "epoch": 1.98, + "learning_rate": "1.7573e-04", + "loss": 0.8748, + "slid_loss": 0.8191, + "step": 2057, + "time": 12.8 + }, + { + "epoch": 1.98, + "learning_rate": "1.7570e-04", + "loss": 0.7838, + "slid_loss": 0.8195, + "step": 2058, + "time": 13.58 + }, + { + "epoch": 1.98, + "learning_rate": "1.7567e-04", + "loss": 0.8423, + "slid_loss": 0.8203, + "step": 2059, + "time": 13.1 + }, + { + "epoch": 1.98, + "learning_rate": "1.7565e-04", + "loss": 0.8232, + "slid_loss": 0.8199, + "step": 2060, + "time": 12.07 + }, + { + "epoch": 1.98, + "learning_rate": "1.7562e-04", + "loss": 0.8262, + "slid_loss": 0.8207, + "step": 2061, + "time": 11.6 + }, + { + "epoch": 1.98, + "learning_rate": "1.7559e-04", + "loss": 0.8769, + "slid_loss": 0.8204, + "step": 2062, + "time": 11.8 + }, + { + "epoch": 1.98, + "learning_rate": "1.7556e-04", + "loss": 0.7845, + "slid_loss": 0.82, + "step": 2063, + "time": 13.83 + }, + { + "epoch": 1.98, + "learning_rate": "1.7553e-04", + "loss": 0.8354, + "slid_loss": 0.8206, + "step": 2064, + "time": 14.01 + }, + { + "epoch": 1.98, + "learning_rate": "1.7550e-04", + "loss": 0.8288, + "slid_loss": 0.8218, + "step": 2065, + "time": 13.9 + }, + { + "epoch": 1.98, + "learning_rate": "1.7547e-04", + "loss": 0.8627, + "slid_loss": 0.8216, + "step": 2066, + "time": 13.26 + }, + { + "epoch": 1.99, + "learning_rate": "1.7544e-04", + "loss": 0.8114, + "slid_loss": 0.821, + "step": 2067, + "time": 13.79 + }, + { + "epoch": 1.99, + "learning_rate": "1.7542e-04", + "loss": 0.8754, + "slid_loss": 0.8223, + "step": 2068, + "time": 13.98 + }, + { + "epoch": 1.99, + "learning_rate": "1.7539e-04", + "loss": 0.8515, + "slid_loss": 0.8221, + "step": 2069, + "time": 13.54 + }, + { + "epoch": 1.99, + "learning_rate": "1.7536e-04", + "loss": 0.7751, + "slid_loss": 0.8213, + "step": 2070, + "time": 12.83 + }, + { + "epoch": 1.99, + "learning_rate": "1.7533e-04", + "loss": 0.842, + "slid_loss": 0.8213, + "step": 2071, + "time": 13.3 + }, + { + "epoch": 1.99, + "learning_rate": "1.7530e-04", + "loss": 0.9551, + "slid_loss": 0.8233, + "step": 2072, + "time": 13.66 + }, + { + "epoch": 1.99, + "learning_rate": "1.7527e-04", + "loss": 0.9395, + "slid_loss": 0.8239, + "step": 2073, + "time": 14.04 + }, + { + "epoch": 1.99, + "learning_rate": "1.7524e-04", + "loss": 0.8937, + "slid_loss": 0.8245, + "step": 2074, + "time": 13.52 + }, + { + "epoch": 1.99, + "learning_rate": "1.7521e-04", + "loss": 0.8417, + "slid_loss": 0.8252, + "step": 2075, + "time": 11.41 + }, + { + "epoch": 1.99, + "learning_rate": "1.7518e-04", + "loss": 0.7155, + "slid_loss": 0.8251, + "step": 2076, + "time": 13.76 + }, + { + "epoch": 2.0, + "learning_rate": "1.7515e-04", + "loss": 0.8252, + "slid_loss": 0.8244, + "step": 2077, + "time": 13.33 + }, + { + "epoch": 2.0, + "learning_rate": "1.7513e-04", + "loss": 0.7343, + "slid_loss": 0.8242, + "step": 2078, + "time": 13.18 + }, + { + "epoch": 2.0, + "learning_rate": "1.7510e-04", + "loss": 0.91, + "slid_loss": 0.8259, + "step": 2079, + "time": 12.83 + }, + { + "epoch": 2.0, + "learning_rate": "1.7507e-04", + "loss": 0.7312, + "slid_loss": 0.8254, + "step": 2080, + "time": 11.55 + }, + { + "epoch": 2.0, + "learning_rate": "1.7504e-04", + "loss": 0.8165, + "slid_loss": 0.8249, + "step": 2081, + "time": 13.89 + }, + { + "epoch": 2.0, + "learning_rate": "1.7501e-04", + "loss": 0.8129, + "slid_loss": 0.8245, + "step": 2082, + "time": 13.39 + }, + { + "epoch": 2.0, + "learning_rate": "1.7498e-04", + "loss": 0.8596, + "slid_loss": 0.8247, + "step": 2083, + "time": 173.62 + }, + { + "epoch": 2.0, + "learning_rate": "1.7495e-04", + "loss": 0.8486, + "slid_loss": 0.8248, + "step": 2084, + "time": 13.4 + }, + { + "epoch": 2.0, + "learning_rate": "1.7492e-04", + "loss": 0.8069, + "slid_loss": 0.8243, + "step": 2085, + "time": 13.29 + }, + { + "epoch": 2.0, + "learning_rate": "1.7489e-04", + "loss": 0.8845, + "slid_loss": 0.8233, + "step": 2086, + "time": 13.22 + }, + { + "epoch": 2.0, + "learning_rate": "1.7486e-04", + "loss": 0.8534, + "slid_loss": 0.8238, + "step": 2087, + "time": 12.77 + }, + { + "epoch": 2.01, + "learning_rate": "1.7484e-04", + "loss": 0.8601, + "slid_loss": 0.8244, + "step": 2088, + "time": 13.39 + }, + { + "epoch": 2.01, + "learning_rate": "1.7481e-04", + "loss": 0.8688, + "slid_loss": 0.8255, + "step": 2089, + "time": 13.55 + }, + { + "epoch": 2.01, + "learning_rate": "1.7478e-04", + "loss": 0.818, + "slid_loss": 0.8252, + "step": 2090, + "time": 13.83 + }, + { + "epoch": 2.01, + "learning_rate": "1.7475e-04", + "loss": 0.7879, + "slid_loss": 0.8244, + "step": 2091, + "time": 12.17 + }, + { + "epoch": 2.01, + "learning_rate": "1.7472e-04", + "loss": 0.7116, + "slid_loss": 0.8231, + "step": 2092, + "time": 13.79 + }, + { + "epoch": 2.01, + "learning_rate": "1.7469e-04", + "loss": 0.815, + "slid_loss": 0.8238, + "step": 2093, + "time": 13.66 + }, + { + "epoch": 2.01, + "learning_rate": "1.7466e-04", + "loss": 0.8384, + "slid_loss": 0.8234, + "step": 2094, + "time": 13.84 + }, + { + "epoch": 2.01, + "learning_rate": "1.7463e-04", + "loss": 0.7947, + "slid_loss": 0.8239, + "step": 2095, + "time": 13.54 + }, + { + "epoch": 2.01, + "learning_rate": "1.7460e-04", + "loss": 0.8604, + "slid_loss": 0.824, + "step": 2096, + "time": 13.56 + }, + { + "epoch": 2.01, + "learning_rate": "1.7457e-04", + "loss": 0.8914, + "slid_loss": 0.8254, + "step": 2097, + "time": 13.65 + }, + { + "epoch": 2.02, + "learning_rate": "1.7454e-04", + "loss": 0.8418, + "slid_loss": 0.8246, + "step": 2098, + "time": 12.98 + }, + { + "epoch": 2.02, + "learning_rate": "1.7451e-04", + "loss": 0.7767, + "slid_loss": 0.8238, + "step": 2099, + "time": 13.37 + }, + { + "epoch": 2.02, + "learning_rate": "1.7449e-04", + "loss": 0.6768, + "slid_loss": 0.8221, + "step": 2100, + "time": 13.65 + }, + { + "epoch": 2.02, + "learning_rate": "1.7446e-04", + "loss": 0.8864, + "slid_loss": 0.8233, + "step": 2101, + "time": 12.18 + }, + { + "epoch": 2.02, + "learning_rate": "1.7443e-04", + "loss": 0.7445, + "slid_loss": 0.8228, + "step": 2102, + "time": 14.28 + }, + { + "epoch": 2.02, + "learning_rate": "1.7440e-04", + "loss": 0.7836, + "slid_loss": 0.8226, + "step": 2103, + "time": 13.22 + }, + { + "epoch": 2.02, + "learning_rate": "1.7437e-04", + "loss": 0.8544, + "slid_loss": 0.8235, + "step": 2104, + "time": 11.44 + }, + { + "epoch": 2.02, + "learning_rate": "1.7434e-04", + "loss": 0.9254, + "slid_loss": 0.8248, + "step": 2105, + "time": 13.91 + }, + { + "epoch": 2.02, + "learning_rate": "1.7431e-04", + "loss": 0.794, + "slid_loss": 0.8256, + "step": 2106, + "time": 13.23 + }, + { + "epoch": 2.02, + "learning_rate": "1.7428e-04", + "loss": 0.7953, + "slid_loss": 0.825, + "step": 2107, + "time": 11.92 + }, + { + "epoch": 2.02, + "learning_rate": "1.7425e-04", + "loss": 0.8632, + "slid_loss": 0.8248, + "step": 2108, + "time": 13.97 + }, + { + "epoch": 2.03, + "learning_rate": "1.7422e-04", + "loss": 0.8479, + "slid_loss": 0.8245, + "step": 2109, + "time": 13.02 + }, + { + "epoch": 2.03, + "learning_rate": "1.7419e-04", + "loss": 0.8253, + "slid_loss": 0.8243, + "step": 2110, + "time": 13.35 + }, + { + "epoch": 2.03, + "learning_rate": "1.7416e-04", + "loss": 0.7581, + "slid_loss": 0.8229, + "step": 2111, + "time": 11.5 + }, + { + "epoch": 2.03, + "learning_rate": "1.7413e-04", + "loss": 0.8236, + "slid_loss": 0.824, + "step": 2112, + "time": 12.86 + }, + { + "epoch": 2.03, + "learning_rate": "1.7410e-04", + "loss": 0.7366, + "slid_loss": 0.8229, + "step": 2113, + "time": 12.91 + }, + { + "epoch": 2.03, + "learning_rate": "1.7407e-04", + "loss": 0.8839, + "slid_loss": 0.8229, + "step": 2114, + "time": 13.29 + }, + { + "epoch": 2.03, + "learning_rate": "1.7405e-04", + "loss": 0.8301, + "slid_loss": 0.823, + "step": 2115, + "time": 11.45 + }, + { + "epoch": 2.03, + "learning_rate": "1.7402e-04", + "loss": 0.8091, + "slid_loss": 0.8233, + "step": 2116, + "time": 13.31 + }, + { + "epoch": 2.03, + "learning_rate": "1.7399e-04", + "loss": 0.8629, + "slid_loss": 0.8244, + "step": 2117, + "time": 12.72 + }, + { + "epoch": 2.03, + "learning_rate": "1.7396e-04", + "loss": 0.7446, + "slid_loss": 0.8231, + "step": 2118, + "time": 14.24 + }, + { + "epoch": 2.04, + "learning_rate": "1.7393e-04", + "loss": 0.7852, + "slid_loss": 0.8234, + "step": 2119, + "time": 11.66 + }, + { + "epoch": 2.04, + "learning_rate": "1.7390e-04", + "loss": 0.7484, + "slid_loss": 0.823, + "step": 2120, + "time": 13.13 + }, + { + "epoch": 2.04, + "learning_rate": "1.7387e-04", + "loss": 0.7833, + "slid_loss": 0.823, + "step": 2121, + "time": 13.36 + }, + { + "epoch": 2.04, + "learning_rate": "1.7384e-04", + "loss": 0.8712, + "slid_loss": 0.8234, + "step": 2122, + "time": 13.77 + }, + { + "epoch": 2.04, + "learning_rate": "1.7381e-04", + "loss": 0.8234, + "slid_loss": 0.8241, + "step": 2123, + "time": 13.09 + }, + { + "epoch": 2.04, + "learning_rate": "1.7378e-04", + "loss": 0.9055, + "slid_loss": 0.8242, + "step": 2124, + "time": 12.77 + }, + { + "epoch": 2.04, + "learning_rate": "1.7375e-04", + "loss": 0.7091, + "slid_loss": 0.8237, + "step": 2125, + "time": 13.55 + }, + { + "epoch": 2.04, + "learning_rate": "1.7372e-04", + "loss": 0.7065, + "slid_loss": 0.8223, + "step": 2126, + "time": 13.16 + }, + { + "epoch": 2.04, + "learning_rate": "1.7369e-04", + "loss": 0.8058, + "slid_loss": 0.8225, + "step": 2127, + "time": 12.64 + }, + { + "epoch": 2.04, + "learning_rate": "1.7366e-04", + "loss": 0.7702, + "slid_loss": 0.8223, + "step": 2128, + "time": 12.85 + }, + { + "epoch": 2.05, + "learning_rate": "1.7363e-04", + "loss": 0.7705, + "slid_loss": 0.8215, + "step": 2129, + "time": 13.27 + }, + { + "epoch": 2.05, + "learning_rate": "1.7360e-04", + "loss": 0.7022, + "slid_loss": 0.8208, + "step": 2130, + "time": 11.27 + }, + { + "epoch": 2.05, + "learning_rate": "1.7357e-04", + "loss": 0.8055, + "slid_loss": 0.8207, + "step": 2131, + "time": 13.4 + }, + { + "epoch": 2.05, + "learning_rate": "1.7354e-04", + "loss": 0.8472, + "slid_loss": 0.8208, + "step": 2132, + "time": 12.34 + }, + { + "epoch": 2.05, + "learning_rate": "1.7351e-04", + "loss": 0.8472, + "slid_loss": 0.821, + "step": 2133, + "time": 13.45 + }, + { + "epoch": 2.05, + "learning_rate": "1.7348e-04", + "loss": 0.8167, + "slid_loss": 0.8214, + "step": 2134, + "time": 12.87 + }, + { + "epoch": 2.05, + "learning_rate": "1.7346e-04", + "loss": 0.7314, + "slid_loss": 0.8207, + "step": 2135, + "time": 11.38 + }, + { + "epoch": 2.05, + "learning_rate": "1.7343e-04", + "loss": 0.8021, + "slid_loss": 0.8203, + "step": 2136, + "time": 13.24 + }, + { + "epoch": 2.05, + "learning_rate": "1.7340e-04", + "loss": 0.8274, + "slid_loss": 0.8201, + "step": 2137, + "time": 13.4 + }, + { + "epoch": 2.05, + "learning_rate": "1.7337e-04", + "loss": 0.8239, + "slid_loss": 0.8193, + "step": 2138, + "time": 11.37 + }, + { + "epoch": 2.05, + "learning_rate": "1.7334e-04", + "loss": 0.7907, + "slid_loss": 0.8187, + "step": 2139, + "time": 14.03 + }, + { + "epoch": 2.06, + "learning_rate": "1.7331e-04", + "loss": 0.8522, + "slid_loss": 0.8184, + "step": 2140, + "time": 15.07 + }, + { + "epoch": 2.06, + "learning_rate": "1.7328e-04", + "loss": 0.7952, + "slid_loss": 0.8189, + "step": 2141, + "time": 13.05 + }, + { + "epoch": 2.06, + "learning_rate": "1.7325e-04", + "loss": 0.9281, + "slid_loss": 0.8206, + "step": 2142, + "time": 13.15 + }, + { + "epoch": 2.06, + "learning_rate": "1.7322e-04", + "loss": 0.7154, + "slid_loss": 0.82, + "step": 2143, + "time": 13.34 + }, + { + "epoch": 2.06, + "learning_rate": "1.7319e-04", + "loss": 0.7952, + "slid_loss": 0.8197, + "step": 2144, + "time": 13.74 + }, + { + "epoch": 2.06, + "learning_rate": "1.7316e-04", + "loss": 0.7374, + "slid_loss": 0.8197, + "step": 2145, + "time": 13.96 + }, + { + "epoch": 2.06, + "learning_rate": "1.7313e-04", + "loss": 0.7527, + "slid_loss": 0.8193, + "step": 2146, + "time": 13.58 + }, + { + "epoch": 2.06, + "learning_rate": "1.7310e-04", + "loss": 0.8629, + "slid_loss": 0.8191, + "step": 2147, + "time": 13.6 + }, + { + "epoch": 2.06, + "learning_rate": "1.7307e-04", + "loss": 0.7596, + "slid_loss": 0.8186, + "step": 2148, + "time": 13.59 + }, + { + "epoch": 2.06, + "learning_rate": "1.7304e-04", + "loss": 0.8607, + "slid_loss": 0.8187, + "step": 2149, + "time": 14.03 + }, + { + "epoch": 2.07, + "learning_rate": "1.7301e-04", + "loss": 0.872, + "slid_loss": 0.8189, + "step": 2150, + "time": 12.81 + }, + { + "epoch": 2.07, + "learning_rate": "1.7298e-04", + "loss": 0.812, + "slid_loss": 0.8194, + "step": 2151, + "time": 13.13 + }, + { + "epoch": 2.07, + "learning_rate": "1.7295e-04", + "loss": 0.7477, + "slid_loss": 0.8179, + "step": 2152, + "time": 11.45 + }, + { + "epoch": 2.07, + "learning_rate": "1.7292e-04", + "loss": 0.8091, + "slid_loss": 0.8172, + "step": 2153, + "time": 13.62 + }, + { + "epoch": 2.07, + "learning_rate": "1.7289e-04", + "loss": 0.7644, + "slid_loss": 0.8161, + "step": 2154, + "time": 12.96 + }, + { + "epoch": 2.07, + "learning_rate": "1.7286e-04", + "loss": 0.7416, + "slid_loss": 0.8161, + "step": 2155, + "time": 13.67 + }, + { + "epoch": 2.07, + "learning_rate": "1.7283e-04", + "loss": 0.8153, + "slid_loss": 0.8153, + "step": 2156, + "time": 13.22 + }, + { + "epoch": 2.07, + "learning_rate": "1.7280e-04", + "loss": 0.7929, + "slid_loss": 0.8145, + "step": 2157, + "time": 13.6 + }, + { + "epoch": 2.07, + "learning_rate": "1.7277e-04", + "loss": 0.8095, + "slid_loss": 0.8147, + "step": 2158, + "time": 11.36 + }, + { + "epoch": 2.07, + "learning_rate": "1.7274e-04", + "loss": 0.7934, + "slid_loss": 0.8142, + "step": 2159, + "time": 13.69 + }, + { + "epoch": 2.07, + "learning_rate": "1.7271e-04", + "loss": 0.7191, + "slid_loss": 0.8132, + "step": 2160, + "time": 12.77 + }, + { + "epoch": 2.08, + "learning_rate": "1.7268e-04", + "loss": 0.7659, + "slid_loss": 0.8126, + "step": 2161, + "time": 13.67 + }, + { + "epoch": 2.08, + "learning_rate": "1.7265e-04", + "loss": 0.7851, + "slid_loss": 0.8117, + "step": 2162, + "time": 13.77 + }, + { + "epoch": 2.08, + "learning_rate": "1.7262e-04", + "loss": 0.7829, + "slid_loss": 0.8117, + "step": 2163, + "time": 13.84 + }, + { + "epoch": 2.08, + "learning_rate": "1.7259e-04", + "loss": 0.7987, + "slid_loss": 0.8113, + "step": 2164, + "time": 12.88 + }, + { + "epoch": 2.08, + "learning_rate": "1.7256e-04", + "loss": 0.7274, + "slid_loss": 0.8103, + "step": 2165, + "time": 13.29 + }, + { + "epoch": 2.08, + "learning_rate": "1.7253e-04", + "loss": 0.8484, + "slid_loss": 0.8101, + "step": 2166, + "time": 14.36 + }, + { + "epoch": 2.08, + "learning_rate": "1.7250e-04", + "loss": 0.8566, + "slid_loss": 0.8106, + "step": 2167, + "time": 13.83 + }, + { + "epoch": 2.08, + "learning_rate": "1.7247e-04", + "loss": 0.7429, + "slid_loss": 0.8093, + "step": 2168, + "time": 14.29 + }, + { + "epoch": 2.08, + "learning_rate": "1.7244e-04", + "loss": 0.8994, + "slid_loss": 0.8097, + "step": 2169, + "time": 13.54 + }, + { + "epoch": 2.08, + "learning_rate": "1.7241e-04", + "loss": 0.8917, + "slid_loss": 0.8109, + "step": 2170, + "time": 13.52 + }, + { + "epoch": 2.09, + "learning_rate": "1.7238e-04", + "loss": 0.6748, + "slid_loss": 0.8092, + "step": 2171, + "time": 12.11 + }, + { + "epoch": 2.09, + "learning_rate": "1.7235e-04", + "loss": 0.8227, + "slid_loss": 0.8079, + "step": 2172, + "time": 13.4 + }, + { + "epoch": 2.09, + "learning_rate": "1.7232e-04", + "loss": 0.921, + "slid_loss": 0.8077, + "step": 2173, + "time": 13.69 + }, + { + "epoch": 2.09, + "learning_rate": "1.7229e-04", + "loss": 0.8645, + "slid_loss": 0.8074, + "step": 2174, + "time": 14.1 + }, + { + "epoch": 2.09, + "learning_rate": "1.7226e-04", + "loss": 0.8723, + "slid_loss": 0.8077, + "step": 2175, + "time": 13.22 + }, + { + "epoch": 2.09, + "learning_rate": "1.7223e-04", + "loss": 0.6827, + "slid_loss": 0.8074, + "step": 2176, + "time": 12.96 + }, + { + "epoch": 2.09, + "learning_rate": "1.7220e-04", + "loss": 0.8471, + "slid_loss": 0.8076, + "step": 2177, + "time": 11.44 + }, + { + "epoch": 2.09, + "learning_rate": "1.7217e-04", + "loss": 0.7979, + "slid_loss": 0.8083, + "step": 2178, + "time": 12.96 + }, + { + "epoch": 2.09, + "learning_rate": "1.7214e-04", + "loss": 0.7906, + "slid_loss": 0.8071, + "step": 2179, + "time": 13.99 + }, + { + "epoch": 2.09, + "learning_rate": "1.7211e-04", + "loss": 0.8215, + "slid_loss": 0.808, + "step": 2180, + "time": 12.84 + }, + { + "epoch": 2.1, + "learning_rate": "1.7208e-04", + "loss": 0.7861, + "slid_loss": 0.8077, + "step": 2181, + "time": 13.3 + }, + { + "epoch": 2.1, + "learning_rate": "1.7205e-04", + "loss": 0.8479, + "slid_loss": 0.808, + "step": 2182, + "time": 13.1 + }, + { + "epoch": 2.1, + "learning_rate": "1.7202e-04", + "loss": 0.8295, + "slid_loss": 0.8077, + "step": 2183, + "time": 13.54 + }, + { + "epoch": 2.1, + "learning_rate": "1.7199e-04", + "loss": 0.8292, + "slid_loss": 0.8075, + "step": 2184, + "time": 12.72 + }, + { + "epoch": 2.1, + "learning_rate": "1.7196e-04", + "loss": 0.8243, + "slid_loss": 0.8077, + "step": 2185, + "time": 14.12 + }, + { + "epoch": 2.1, + "learning_rate": "1.7193e-04", + "loss": 0.8517, + "slid_loss": 0.8074, + "step": 2186, + "time": 13.01 + }, + { + "epoch": 2.1, + "learning_rate": "1.7190e-04", + "loss": 0.9026, + "slid_loss": 0.8079, + "step": 2187, + "time": 13.95 + }, + { + "epoch": 2.1, + "learning_rate": "1.7187e-04", + "loss": 0.701, + "slid_loss": 0.8063, + "step": 2188, + "time": 12.9 + }, + { + "epoch": 2.1, + "learning_rate": "1.7184e-04", + "loss": 0.7578, + "slid_loss": 0.8052, + "step": 2189, + "time": 13.78 + }, + { + "epoch": 2.1, + "learning_rate": "1.7181e-04", + "loss": 0.8242, + "slid_loss": 0.8052, + "step": 2190, + "time": 13.31 + }, + { + "epoch": 2.1, + "learning_rate": "1.7178e-04", + "loss": 0.8126, + "slid_loss": 0.8055, + "step": 2191, + "time": 12.31 + }, + { + "epoch": 2.11, + "learning_rate": "1.7175e-04", + "loss": 0.8094, + "slid_loss": 0.8064, + "step": 2192, + "time": 13.22 + }, + { + "epoch": 2.11, + "learning_rate": "1.7172e-04", + "loss": 0.768, + "slid_loss": 0.806, + "step": 2193, + "time": 13.59 + }, + { + "epoch": 2.11, + "learning_rate": "1.7169e-04", + "loss": 0.872, + "slid_loss": 0.8063, + "step": 2194, + "time": 13.26 + }, + { + "epoch": 2.11, + "learning_rate": "1.7166e-04", + "loss": 0.8102, + "slid_loss": 0.8065, + "step": 2195, + "time": 12.21 + }, + { + "epoch": 2.11, + "learning_rate": "1.7163e-04", + "loss": 0.7281, + "slid_loss": 0.8051, + "step": 2196, + "time": 13.83 + }, + { + "epoch": 2.11, + "learning_rate": "1.7160e-04", + "loss": 0.8073, + "slid_loss": 0.8043, + "step": 2197, + "time": 13.23 + }, + { + "epoch": 2.11, + "learning_rate": "1.7157e-04", + "loss": 0.855, + "slid_loss": 0.8044, + "step": 2198, + "time": 13.45 + }, + { + "epoch": 2.11, + "learning_rate": "1.7154e-04", + "loss": 0.872, + "slid_loss": 0.8054, + "step": 2199, + "time": 13.25 + }, + { + "epoch": 2.11, + "learning_rate": "1.7151e-04", + "loss": 0.6447, + "slid_loss": 0.8051, + "step": 2200, + "time": 11.85 + }, + { + "epoch": 2.11, + "learning_rate": "1.7148e-04", + "loss": 0.8157, + "slid_loss": 0.8044, + "step": 2201, + "time": 13.88 + }, + { + "epoch": 2.12, + "learning_rate": "1.7145e-04", + "loss": 0.8816, + "slid_loss": 0.8057, + "step": 2202, + "time": 14.14 + }, + { + "epoch": 2.12, + "learning_rate": "1.7142e-04", + "loss": 0.7806, + "slid_loss": 0.8057, + "step": 2203, + "time": 13.88 + }, + { + "epoch": 2.12, + "learning_rate": "1.7139e-04", + "loss": 0.8343, + "slid_loss": 0.8055, + "step": 2204, + "time": 14.29 + }, + { + "epoch": 2.12, + "learning_rate": "1.7136e-04", + "loss": 0.6909, + "slid_loss": 0.8031, + "step": 2205, + "time": 13.05 + }, + { + "epoch": 2.12, + "learning_rate": "1.7133e-04", + "loss": 0.8889, + "slid_loss": 0.8041, + "step": 2206, + "time": 13.97 + }, + { + "epoch": 2.12, + "learning_rate": "1.7130e-04", + "loss": 0.7157, + "slid_loss": 0.8033, + "step": 2207, + "time": 12.82 + }, + { + "epoch": 2.12, + "learning_rate": "1.7127e-04", + "loss": 0.7919, + "slid_loss": 0.8026, + "step": 2208, + "time": 11.83 + }, + { + "epoch": 2.12, + "learning_rate": "1.7124e-04", + "loss": 0.7788, + "slid_loss": 0.8019, + "step": 2209, + "time": 12.03 + }, + { + "epoch": 2.12, + "learning_rate": "1.7121e-04", + "loss": 0.8312, + "slid_loss": 0.802, + "step": 2210, + "time": 13.79 + }, + { + "epoch": 2.12, + "learning_rate": "1.7117e-04", + "loss": 0.8003, + "slid_loss": 0.8024, + "step": 2211, + "time": 11.86 + }, + { + "epoch": 2.12, + "learning_rate": "1.7114e-04", + "loss": 0.7627, + "slid_loss": 0.8018, + "step": 2212, + "time": 13.76 + }, + { + "epoch": 2.13, + "learning_rate": "1.7111e-04", + "loss": 0.793, + "slid_loss": 0.8023, + "step": 2213, + "time": 14.52 + }, + { + "epoch": 2.13, + "learning_rate": "1.7108e-04", + "loss": 0.7943, + "slid_loss": 0.8014, + "step": 2214, + "time": 13.0 + }, + { + "epoch": 2.13, + "learning_rate": "1.7105e-04", + "loss": 0.7813, + "slid_loss": 0.801, + "step": 2215, + "time": 13.3 + }, + { + "epoch": 2.13, + "learning_rate": "1.7102e-04", + "loss": 0.7085, + "slid_loss": 0.7999, + "step": 2216, + "time": 13.94 + }, + { + "epoch": 2.13, + "learning_rate": "1.7099e-04", + "loss": 0.8356, + "slid_loss": 0.7997, + "step": 2217, + "time": 13.12 + }, + { + "epoch": 2.13, + "learning_rate": "1.7096e-04", + "loss": 0.809, + "slid_loss": 0.8003, + "step": 2218, + "time": 13.59 + }, + { + "epoch": 2.13, + "learning_rate": "1.7093e-04", + "loss": 0.7378, + "slid_loss": 0.7998, + "step": 2219, + "time": 13.55 + }, + { + "epoch": 2.13, + "learning_rate": "1.7090e-04", + "loss": 0.7442, + "slid_loss": 0.7998, + "step": 2220, + "time": 13.57 + }, + { + "epoch": 2.13, + "learning_rate": "1.7087e-04", + "loss": 0.7218, + "slid_loss": 0.7992, + "step": 2221, + "time": 12.79 + }, + { + "epoch": 2.13, + "learning_rate": "1.7084e-04", + "loss": 0.8291, + "slid_loss": 0.7988, + "step": 2222, + "time": 12.76 + }, + { + "epoch": 2.14, + "learning_rate": "1.7081e-04", + "loss": 0.7834, + "slid_loss": 0.7984, + "step": 2223, + "time": 12.8 + }, + { + "epoch": 2.14, + "learning_rate": "1.7078e-04", + "loss": 0.7405, + "slid_loss": 0.7967, + "step": 2224, + "time": 14.2 + }, + { + "epoch": 2.14, + "learning_rate": "1.7075e-04", + "loss": 0.7425, + "slid_loss": 0.797, + "step": 2225, + "time": 13.25 + }, + { + "epoch": 2.14, + "learning_rate": "1.7072e-04", + "loss": 0.7462, + "slid_loss": 0.7974, + "step": 2226, + "time": 13.13 + }, + { + "epoch": 2.14, + "learning_rate": "1.7069e-04", + "loss": 0.8373, + "slid_loss": 0.7978, + "step": 2227, + "time": 13.85 + }, + { + "epoch": 2.14, + "learning_rate": "1.7066e-04", + "loss": 0.7493, + "slid_loss": 0.7976, + "step": 2228, + "time": 13.57 + }, + { + "epoch": 2.14, + "learning_rate": "1.7063e-04", + "loss": 0.8001, + "slid_loss": 0.7978, + "step": 2229, + "time": 12.41 + }, + { + "epoch": 2.14, + "learning_rate": "1.7060e-04", + "loss": 0.7578, + "slid_loss": 0.7984, + "step": 2230, + "time": 13.46 + }, + { + "epoch": 2.14, + "learning_rate": "1.7057e-04", + "loss": 0.8109, + "slid_loss": 0.7985, + "step": 2231, + "time": 12.2 + }, + { + "epoch": 2.14, + "learning_rate": "1.7053e-04", + "loss": 0.7628, + "slid_loss": 0.7976, + "step": 2232, + "time": 13.15 + }, + { + "epoch": 2.15, + "learning_rate": "1.7050e-04", + "loss": 0.8082, + "slid_loss": 0.7972, + "step": 2233, + "time": 13.09 + }, + { + "epoch": 2.15, + "learning_rate": "1.7047e-04", + "loss": 0.8269, + "slid_loss": 0.7973, + "step": 2234, + "time": 13.69 + }, + { + "epoch": 2.15, + "learning_rate": "1.7044e-04", + "loss": 0.7934, + "slid_loss": 0.7979, + "step": 2235, + "time": 13.86 + }, + { + "epoch": 2.15, + "learning_rate": "1.7041e-04", + "loss": 0.7865, + "slid_loss": 0.7978, + "step": 2236, + "time": 13.57 + }, + { + "epoch": 2.15, + "learning_rate": "1.7038e-04", + "loss": 0.7845, + "slid_loss": 0.7974, + "step": 2237, + "time": 14.49 + }, + { + "epoch": 2.15, + "learning_rate": "1.7035e-04", + "loss": 0.7661, + "slid_loss": 0.7968, + "step": 2238, + "time": 13.6 + }, + { + "epoch": 2.15, + "learning_rate": "1.7032e-04", + "loss": 0.7414, + "slid_loss": 0.7963, + "step": 2239, + "time": 12.2 + }, + { + "epoch": 2.15, + "learning_rate": "1.7029e-04", + "loss": 0.8403, + "slid_loss": 0.7962, + "step": 2240, + "time": 14.26 + }, + { + "epoch": 2.15, + "learning_rate": "1.7026e-04", + "loss": 0.7392, + "slid_loss": 0.7956, + "step": 2241, + "time": 13.82 + }, + { + "epoch": 2.15, + "learning_rate": "1.7023e-04", + "loss": 0.8023, + "slid_loss": 0.7944, + "step": 2242, + "time": 13.34 + }, + { + "epoch": 2.15, + "learning_rate": "1.7020e-04", + "loss": 0.8651, + "slid_loss": 0.7959, + "step": 2243, + "time": 13.52 + }, + { + "epoch": 2.16, + "learning_rate": "1.7017e-04", + "loss": 0.8172, + "slid_loss": 0.7961, + "step": 2244, + "time": 12.9 + }, + { + "epoch": 2.16, + "learning_rate": "1.7014e-04", + "loss": 0.8359, + "slid_loss": 0.7971, + "step": 2245, + "time": 12.85 + }, + { + "epoch": 2.16, + "learning_rate": "1.7011e-04", + "loss": 0.7136, + "slid_loss": 0.7967, + "step": 2246, + "time": 13.47 + }, + { + "epoch": 2.16, + "learning_rate": "1.7008e-04", + "loss": 0.7401, + "slid_loss": 0.7954, + "step": 2247, + "time": 13.65 + }, + { + "epoch": 2.16, + "learning_rate": "1.7004e-04", + "loss": 0.8518, + "slid_loss": 0.7964, + "step": 2248, + "time": 13.91 + }, + { + "epoch": 2.16, + "learning_rate": "1.7001e-04", + "loss": 0.7898, + "slid_loss": 0.7957, + "step": 2249, + "time": 13.07 + }, + { + "epoch": 2.16, + "learning_rate": "1.6998e-04", + "loss": 0.8022, + "slid_loss": 0.795, + "step": 2250, + "time": 13.7 + }, + { + "epoch": 2.16, + "learning_rate": "1.6995e-04", + "loss": 0.8057, + "slid_loss": 0.7949, + "step": 2251, + "time": 14.76 + }, + { + "epoch": 2.16, + "learning_rate": "1.6992e-04", + "loss": 0.7467, + "slid_loss": 0.7949, + "step": 2252, + "time": 13.65 + }, + { + "epoch": 2.16, + "learning_rate": "1.6989e-04", + "loss": 0.7295, + "slid_loss": 0.7941, + "step": 2253, + "time": 13.11 + }, + { + "epoch": 2.17, + "learning_rate": "1.6986e-04", + "loss": 0.8899, + "slid_loss": 0.7953, + "step": 2254, + "time": 11.4 + }, + { + "epoch": 2.17, + "learning_rate": "1.6983e-04", + "loss": 0.7359, + "slid_loss": 0.7953, + "step": 2255, + "time": 13.13 + }, + { + "epoch": 2.17, + "learning_rate": "1.6980e-04", + "loss": 0.8267, + "slid_loss": 0.7954, + "step": 2256, + "time": 14.08 + }, + { + "epoch": 2.17, + "learning_rate": "1.6977e-04", + "loss": 0.823, + "slid_loss": 0.7957, + "step": 2257, + "time": 13.38 + }, + { + "epoch": 2.17, + "learning_rate": "1.6974e-04", + "loss": 0.7864, + "slid_loss": 0.7955, + "step": 2258, + "time": 13.72 + }, + { + "epoch": 2.17, + "learning_rate": "1.6971e-04", + "loss": 0.8135, + "slid_loss": 0.7957, + "step": 2259, + "time": 12.36 + }, + { + "epoch": 2.17, + "learning_rate": "1.6968e-04", + "loss": 0.8588, + "slid_loss": 0.7971, + "step": 2260, + "time": 12.41 + }, + { + "epoch": 2.17, + "learning_rate": "1.6964e-04", + "loss": 0.9041, + "slid_loss": 0.7984, + "step": 2261, + "time": 13.84 + }, + { + "epoch": 2.17, + "learning_rate": "1.6961e-04", + "loss": 0.7461, + "slid_loss": 0.7981, + "step": 2262, + "time": 12.28 + }, + { + "epoch": 2.17, + "learning_rate": "1.6958e-04", + "loss": 0.783, + "slid_loss": 0.7981, + "step": 2263, + "time": 12.81 + }, + { + "epoch": 2.17, + "learning_rate": "1.6955e-04", + "loss": 0.8091, + "slid_loss": 0.7982, + "step": 2264, + "time": 13.98 + }, + { + "epoch": 2.18, + "learning_rate": "1.6952e-04", + "loss": 0.7466, + "slid_loss": 0.7984, + "step": 2265, + "time": 13.11 + }, + { + "epoch": 2.18, + "learning_rate": "1.6949e-04", + "loss": 0.815, + "slid_loss": 0.798, + "step": 2266, + "time": 14.01 + }, + { + "epoch": 2.18, + "learning_rate": "1.6946e-04", + "loss": 0.7752, + "slid_loss": 0.7972, + "step": 2267, + "time": 13.44 + }, + { + "epoch": 2.18, + "learning_rate": "1.6943e-04", + "loss": 0.8469, + "slid_loss": 0.7982, + "step": 2268, + "time": 11.55 + }, + { + "epoch": 2.18, + "learning_rate": "1.6940e-04", + "loss": 0.6493, + "slid_loss": 0.7957, + "step": 2269, + "time": 12.92 + }, + { + "epoch": 2.18, + "learning_rate": "1.6937e-04", + "loss": 0.9232, + "slid_loss": 0.7961, + "step": 2270, + "time": 13.65 + }, + { + "epoch": 2.18, + "learning_rate": "1.6934e-04", + "loss": 0.8403, + "slid_loss": 0.7977, + "step": 2271, + "time": 12.43 + }, + { + "epoch": 2.18, + "learning_rate": "1.6930e-04", + "loss": 0.7851, + "slid_loss": 0.7973, + "step": 2272, + "time": 11.77 + }, + { + "epoch": 2.18, + "learning_rate": "1.6927e-04", + "loss": 0.837, + "slid_loss": 0.7965, + "step": 2273, + "time": 13.64 + }, + { + "epoch": 2.18, + "learning_rate": "1.6924e-04", + "loss": 0.8088, + "slid_loss": 0.7959, + "step": 2274, + "time": 12.15 + }, + { + "epoch": 2.19, + "learning_rate": "1.6921e-04", + "loss": 0.8475, + "slid_loss": 0.7957, + "step": 2275, + "time": 12.3 + }, + { + "epoch": 2.19, + "learning_rate": "1.6918e-04", + "loss": 0.8388, + "slid_loss": 0.7973, + "step": 2276, + "time": 12.1 + }, + { + "epoch": 2.19, + "learning_rate": "1.6915e-04", + "loss": 0.6712, + "slid_loss": 0.7955, + "step": 2277, + "time": 13.59 + }, + { + "epoch": 2.19, + "learning_rate": "1.6912e-04", + "loss": 0.7821, + "slid_loss": 0.7953, + "step": 2278, + "time": 13.2 + }, + { + "epoch": 2.19, + "learning_rate": "1.6909e-04", + "loss": 0.7661, + "slid_loss": 0.7951, + "step": 2279, + "time": 13.25 + }, + { + "epoch": 2.19, + "learning_rate": "1.6906e-04", + "loss": 0.6763, + "slid_loss": 0.7936, + "step": 2280, + "time": 13.74 + }, + { + "epoch": 2.19, + "learning_rate": "1.6903e-04", + "loss": 0.8165, + "slid_loss": 0.7939, + "step": 2281, + "time": 13.52 + }, + { + "epoch": 2.19, + "learning_rate": "1.6899e-04", + "loss": 0.7703, + "slid_loss": 0.7932, + "step": 2282, + "time": 13.82 + }, + { + "epoch": 2.19, + "learning_rate": "1.6896e-04", + "loss": 0.7351, + "slid_loss": 0.7922, + "step": 2283, + "time": 11.87 + }, + { + "epoch": 2.19, + "learning_rate": "1.6893e-04", + "loss": 0.6802, + "slid_loss": 0.7907, + "step": 2284, + "time": 14.03 + }, + { + "epoch": 2.2, + "learning_rate": "1.6890e-04", + "loss": 0.8116, + "slid_loss": 0.7906, + "step": 2285, + "time": 11.56 + }, + { + "epoch": 2.2, + "learning_rate": "1.6887e-04", + "loss": 0.7986, + "slid_loss": 0.7901, + "step": 2286, + "time": 13.35 + }, + { + "epoch": 2.2, + "learning_rate": "1.6884e-04", + "loss": 0.867, + "slid_loss": 0.7897, + "step": 2287, + "time": 14.06 + }, + { + "epoch": 2.2, + "learning_rate": "1.6881e-04", + "loss": 0.7727, + "slid_loss": 0.7904, + "step": 2288, + "time": 12.77 + }, + { + "epoch": 2.2, + "learning_rate": "1.6878e-04", + "loss": 0.7279, + "slid_loss": 0.7901, + "step": 2289, + "time": 13.43 + }, + { + "epoch": 2.2, + "learning_rate": "1.6875e-04", + "loss": 0.8506, + "slid_loss": 0.7904, + "step": 2290, + "time": 13.79 + }, + { + "epoch": 2.2, + "learning_rate": "1.6872e-04", + "loss": 0.8056, + "slid_loss": 0.7903, + "step": 2291, + "time": 13.3 + }, + { + "epoch": 2.2, + "learning_rate": "1.6868e-04", + "loss": 0.8424, + "slid_loss": 0.7907, + "step": 2292, + "time": 13.4 + }, + { + "epoch": 2.2, + "learning_rate": "1.6865e-04", + "loss": 0.7595, + "slid_loss": 0.7906, + "step": 2293, + "time": 13.26 + }, + { + "epoch": 2.2, + "learning_rate": "1.6862e-04", + "loss": 0.695, + "slid_loss": 0.7888, + "step": 2294, + "time": 13.87 + }, + { + "epoch": 2.2, + "learning_rate": "1.6859e-04", + "loss": 0.7358, + "slid_loss": 0.7881, + "step": 2295, + "time": 12.29 + }, + { + "epoch": 2.21, + "learning_rate": "1.6856e-04", + "loss": 0.8463, + "slid_loss": 0.7892, + "step": 2296, + "time": 12.86 + }, + { + "epoch": 2.21, + "learning_rate": "1.6853e-04", + "loss": 0.7959, + "slid_loss": 0.7891, + "step": 2297, + "time": 11.48 + }, + { + "epoch": 2.21, + "learning_rate": "1.6850e-04", + "loss": 0.839, + "slid_loss": 0.789, + "step": 2298, + "time": 12.77 + }, + { + "epoch": 2.21, + "learning_rate": "1.6847e-04", + "loss": 0.8562, + "slid_loss": 0.7888, + "step": 2299, + "time": 13.41 + }, + { + "epoch": 2.21, + "learning_rate": "1.6843e-04", + "loss": 0.7578, + "slid_loss": 0.7899, + "step": 2300, + "time": 13.22 + }, + { + "epoch": 2.21, + "learning_rate": "1.6840e-04", + "loss": 0.7658, + "slid_loss": 0.7894, + "step": 2301, + "time": 11.97 + }, + { + "epoch": 2.21, + "learning_rate": "1.6837e-04", + "loss": 0.7181, + "slid_loss": 0.7878, + "step": 2302, + "time": 12.84 + }, + { + "epoch": 2.21, + "learning_rate": "1.6834e-04", + "loss": 0.8497, + "slid_loss": 0.7885, + "step": 2303, + "time": 13.41 + }, + { + "epoch": 2.21, + "learning_rate": "1.6831e-04", + "loss": 0.8063, + "slid_loss": 0.7882, + "step": 2304, + "time": 10.88 + }, + { + "epoch": 2.21, + "learning_rate": "1.6828e-04", + "loss": 0.7609, + "slid_loss": 0.7889, + "step": 2305, + "time": 13.8 + }, + { + "epoch": 2.22, + "learning_rate": "1.6825e-04", + "loss": 0.767, + "slid_loss": 0.7877, + "step": 2306, + "time": 10.76 + }, + { + "epoch": 2.22, + "learning_rate": "1.6822e-04", + "loss": 0.8057, + "slid_loss": 0.7886, + "step": 2307, + "time": 12.87 + }, + { + "epoch": 2.22, + "learning_rate": "1.6819e-04", + "loss": 0.8459, + "slid_loss": 0.7891, + "step": 2308, + "time": 13.4 + }, + { + "epoch": 2.22, + "learning_rate": "1.6815e-04", + "loss": 0.8029, + "slid_loss": 0.7894, + "step": 2309, + "time": 12.63 + }, + { + "epoch": 2.22, + "learning_rate": "1.6812e-04", + "loss": 0.783, + "slid_loss": 0.7889, + "step": 2310, + "time": 13.76 + }, + { + "epoch": 2.22, + "learning_rate": "1.6809e-04", + "loss": 0.7074, + "slid_loss": 0.788, + "step": 2311, + "time": 13.86 + }, + { + "epoch": 2.22, + "learning_rate": "1.6806e-04", + "loss": 0.8522, + "slid_loss": 0.7889, + "step": 2312, + "time": 14.04 + }, + { + "epoch": 2.22, + "learning_rate": "1.6803e-04", + "loss": 0.7792, + "slid_loss": 0.7887, + "step": 2313, + "time": 12.84 + }, + { + "epoch": 2.22, + "learning_rate": "1.6800e-04", + "loss": 0.7522, + "slid_loss": 0.7883, + "step": 2314, + "time": 13.65 + }, + { + "epoch": 2.22, + "learning_rate": "1.6797e-04", + "loss": 0.8422, + "slid_loss": 0.7889, + "step": 2315, + "time": 11.81 + }, + { + "epoch": 2.22, + "learning_rate": "1.6794e-04", + "loss": 0.8003, + "slid_loss": 0.7898, + "step": 2316, + "time": 13.99 + }, + { + "epoch": 2.23, + "learning_rate": "1.6790e-04", + "loss": 0.8567, + "slid_loss": 0.79, + "step": 2317, + "time": 12.72 + }, + { + "epoch": 2.23, + "learning_rate": "1.6787e-04", + "loss": 0.7813, + "slid_loss": 0.7898, + "step": 2318, + "time": 11.83 + }, + { + "epoch": 2.23, + "learning_rate": "1.6784e-04", + "loss": 0.8504, + "slid_loss": 0.7909, + "step": 2319, + "time": 13.67 + }, + { + "epoch": 2.23, + "learning_rate": "1.6781e-04", + "loss": 0.8548, + "slid_loss": 0.792, + "step": 2320, + "time": 13.38 + }, + { + "epoch": 2.23, + "learning_rate": "1.6778e-04", + "loss": 0.7591, + "slid_loss": 0.7924, + "step": 2321, + "time": 12.87 + }, + { + "epoch": 2.23, + "learning_rate": "1.6775e-04", + "loss": 0.7466, + "slid_loss": 0.7916, + "step": 2322, + "time": 12.48 + }, + { + "epoch": 2.23, + "learning_rate": "1.6772e-04", + "loss": 0.8087, + "slid_loss": 0.7918, + "step": 2323, + "time": 13.92 + }, + { + "epoch": 2.23, + "learning_rate": "1.6768e-04", + "loss": 0.7242, + "slid_loss": 0.7916, + "step": 2324, + "time": 12.92 + }, + { + "epoch": 2.23, + "learning_rate": "1.6765e-04", + "loss": 0.7763, + "slid_loss": 0.792, + "step": 2325, + "time": 13.39 + }, + { + "epoch": 2.23, + "learning_rate": "1.6762e-04", + "loss": 0.7529, + "slid_loss": 0.792, + "step": 2326, + "time": 13.01 + }, + { + "epoch": 2.24, + "learning_rate": "1.6759e-04", + "loss": 0.8644, + "slid_loss": 0.7923, + "step": 2327, + "time": 13.58 + }, + { + "epoch": 2.24, + "learning_rate": "1.6756e-04", + "loss": 0.7668, + "slid_loss": 0.7925, + "step": 2328, + "time": 11.64 + }, + { + "epoch": 2.24, + "learning_rate": "1.6753e-04", + "loss": 0.8452, + "slid_loss": 0.7929, + "step": 2329, + "time": 12.78 + }, + { + "epoch": 2.24, + "learning_rate": "1.6750e-04", + "loss": 0.7508, + "slid_loss": 0.7929, + "step": 2330, + "time": 12.93 + }, + { + "epoch": 2.24, + "learning_rate": "1.6746e-04", + "loss": 0.6384, + "slid_loss": 0.7911, + "step": 2331, + "time": 11.64 + }, + { + "epoch": 2.24, + "learning_rate": "1.6743e-04", + "loss": 0.776, + "slid_loss": 0.7913, + "step": 2332, + "time": 13.33 + }, + { + "epoch": 2.24, + "learning_rate": "1.6740e-04", + "loss": 0.8438, + "slid_loss": 0.7916, + "step": 2333, + "time": 12.15 + }, + { + "epoch": 2.24, + "learning_rate": "1.6737e-04", + "loss": 0.7734, + "slid_loss": 0.7911, + "step": 2334, + "time": 12.84 + }, + { + "epoch": 2.24, + "learning_rate": "1.6734e-04", + "loss": 0.7763, + "slid_loss": 0.7909, + "step": 2335, + "time": 13.36 + }, + { + "epoch": 2.24, + "learning_rate": "1.6731e-04", + "loss": 0.7231, + "slid_loss": 0.7903, + "step": 2336, + "time": 13.56 + }, + { + "epoch": 2.24, + "learning_rate": "1.6728e-04", + "loss": 0.8055, + "slid_loss": 0.7905, + "step": 2337, + "time": 12.52 + }, + { + "epoch": 2.25, + "learning_rate": "1.6724e-04", + "loss": 0.7972, + "slid_loss": 0.7908, + "step": 2338, + "time": 13.48 + }, + { + "epoch": 2.25, + "learning_rate": "1.6721e-04", + "loss": 0.7652, + "slid_loss": 0.7911, + "step": 2339, + "time": 13.69 + }, + { + "epoch": 2.25, + "learning_rate": "1.6718e-04", + "loss": 0.778, + "slid_loss": 0.7904, + "step": 2340, + "time": 12.3 + }, + { + "epoch": 2.25, + "learning_rate": "1.6715e-04", + "loss": 0.7265, + "slid_loss": 0.7903, + "step": 2341, + "time": 13.1 + }, + { + "epoch": 2.25, + "learning_rate": "1.6712e-04", + "loss": 0.8467, + "slid_loss": 0.7907, + "step": 2342, + "time": 14.03 + }, + { + "epoch": 2.25, + "learning_rate": "1.6709e-04", + "loss": 0.7228, + "slid_loss": 0.7893, + "step": 2343, + "time": 13.65 + }, + { + "epoch": 2.25, + "learning_rate": "1.6706e-04", + "loss": 0.7076, + "slid_loss": 0.7882, + "step": 2344, + "time": 12.87 + }, + { + "epoch": 2.25, + "learning_rate": "1.6702e-04", + "loss": 0.8349, + "slid_loss": 0.7882, + "step": 2345, + "time": 13.15 + }, + { + "epoch": 2.25, + "learning_rate": "1.6699e-04", + "loss": 0.8123, + "slid_loss": 0.7892, + "step": 2346, + "time": 13.2 + }, + { + "epoch": 2.25, + "learning_rate": "1.6696e-04", + "loss": 0.7886, + "slid_loss": 0.7897, + "step": 2347, + "time": 13.72 + }, + { + "epoch": 2.26, + "learning_rate": "1.6693e-04", + "loss": 0.7735, + "slid_loss": 0.7889, + "step": 2348, + "time": 11.02 + }, + { + "epoch": 2.26, + "learning_rate": "1.6690e-04", + "loss": 0.8651, + "slid_loss": 0.7897, + "step": 2349, + "time": 12.83 + }, + { + "epoch": 2.26, + "learning_rate": "1.6687e-04", + "loss": 0.7521, + "slid_loss": 0.7892, + "step": 2350, + "time": 12.67 + }, + { + "epoch": 2.26, + "learning_rate": "1.6683e-04", + "loss": 0.7954, + "slid_loss": 0.7891, + "step": 2351, + "time": 12.06 + }, + { + "epoch": 2.26, + "learning_rate": "1.6680e-04", + "loss": 0.8034, + "slid_loss": 0.7896, + "step": 2352, + "time": 12.2 + }, + { + "epoch": 2.26, + "learning_rate": "1.6677e-04", + "loss": 0.7668, + "slid_loss": 0.79, + "step": 2353, + "time": 13.38 + }, + { + "epoch": 2.26, + "learning_rate": "1.6674e-04", + "loss": 0.7665, + "slid_loss": 0.7888, + "step": 2354, + "time": 12.89 + }, + { + "epoch": 2.26, + "learning_rate": "1.6671e-04", + "loss": 0.7346, + "slid_loss": 0.7887, + "step": 2355, + "time": 13.14 + }, + { + "epoch": 2.26, + "learning_rate": "1.6668e-04", + "loss": 0.8758, + "slid_loss": 0.7892, + "step": 2356, + "time": 12.94 + }, + { + "epoch": 2.26, + "learning_rate": "1.6665e-04", + "loss": 0.8132, + "slid_loss": 0.7891, + "step": 2357, + "time": 13.35 + }, + { + "epoch": 2.27, + "learning_rate": "1.6661e-04", + "loss": 0.7162, + "slid_loss": 0.7884, + "step": 2358, + "time": 13.57 + }, + { + "epoch": 2.27, + "learning_rate": "1.6658e-04", + "loss": 0.8178, + "slid_loss": 0.7885, + "step": 2359, + "time": 11.26 + }, + { + "epoch": 2.27, + "learning_rate": "1.6655e-04", + "loss": 0.7531, + "slid_loss": 0.7874, + "step": 2360, + "time": 13.53 + }, + { + "epoch": 2.27, + "learning_rate": "1.6652e-04", + "loss": 0.8243, + "slid_loss": 0.7866, + "step": 2361, + "time": 13.28 + }, + { + "epoch": 2.27, + "learning_rate": "1.6649e-04", + "loss": 0.7408, + "slid_loss": 0.7866, + "step": 2362, + "time": 14.02 + }, + { + "epoch": 2.27, + "learning_rate": "1.6646e-04", + "loss": 0.7641, + "slid_loss": 0.7864, + "step": 2363, + "time": 14.03 + }, + { + "epoch": 2.27, + "learning_rate": "1.6642e-04", + "loss": 0.7551, + "slid_loss": 0.7858, + "step": 2364, + "time": 13.84 + }, + { + "epoch": 2.27, + "learning_rate": "1.6639e-04", + "loss": 0.8734, + "slid_loss": 0.7871, + "step": 2365, + "time": 11.24 + }, + { + "epoch": 2.27, + "learning_rate": "1.6636e-04", + "loss": 0.7956, + "slid_loss": 0.7869, + "step": 2366, + "time": 13.12 + }, + { + "epoch": 2.27, + "learning_rate": "1.6633e-04", + "loss": 0.7144, + "slid_loss": 0.7863, + "step": 2367, + "time": 11.58 + }, + { + "epoch": 2.27, + "learning_rate": "1.6630e-04", + "loss": 0.74, + "slid_loss": 0.7852, + "step": 2368, + "time": 13.49 + }, + { + "epoch": 2.28, + "learning_rate": "1.6627e-04", + "loss": 0.8097, + "slid_loss": 0.7868, + "step": 2369, + "time": 13.67 + }, + { + "epoch": 2.28, + "learning_rate": "1.6623e-04", + "loss": 0.7571, + "slid_loss": 0.7852, + "step": 2370, + "time": 13.92 + }, + { + "epoch": 2.28, + "learning_rate": "1.6620e-04", + "loss": 0.7708, + "slid_loss": 0.7845, + "step": 2371, + "time": 13.42 + }, + { + "epoch": 2.28, + "learning_rate": "1.6617e-04", + "loss": 0.8583, + "slid_loss": 0.7852, + "step": 2372, + "time": 13.91 + }, + { + "epoch": 2.28, + "learning_rate": "1.6614e-04", + "loss": 0.65, + "slid_loss": 0.7834, + "step": 2373, + "time": 12.26 + }, + { + "epoch": 2.28, + "learning_rate": "1.6611e-04", + "loss": 0.6606, + "slid_loss": 0.7819, + "step": 2374, + "time": 14.12 + }, + { + "epoch": 2.28, + "learning_rate": "1.6607e-04", + "loss": 0.7866, + "slid_loss": 0.7813, + "step": 2375, + "time": 13.72 + }, + { + "epoch": 2.28, + "learning_rate": "1.6604e-04", + "loss": 0.7201, + "slid_loss": 0.7801, + "step": 2376, + "time": 12.76 + }, + { + "epoch": 2.28, + "learning_rate": "1.6601e-04", + "loss": 0.7036, + "slid_loss": 0.7804, + "step": 2377, + "time": 11.71 + }, + { + "epoch": 2.28, + "learning_rate": "1.6598e-04", + "loss": 0.6641, + "slid_loss": 0.7792, + "step": 2378, + "time": 14.01 + }, + { + "epoch": 2.29, + "learning_rate": "1.6595e-04", + "loss": 0.707, + "slid_loss": 0.7786, + "step": 2379, + "time": 12.15 + }, + { + "epoch": 2.29, + "learning_rate": "1.6592e-04", + "loss": 0.7834, + "slid_loss": 0.7797, + "step": 2380, + "time": 13.33 + }, + { + "epoch": 2.29, + "learning_rate": "1.6588e-04", + "loss": 0.719, + "slid_loss": 0.7787, + "step": 2381, + "time": 13.88 + }, + { + "epoch": 2.29, + "learning_rate": "1.6585e-04", + "loss": 0.8067, + "slid_loss": 0.7791, + "step": 2382, + "time": 15.35 + }, + { + "epoch": 2.29, + "learning_rate": "1.6582e-04", + "loss": 0.8457, + "slid_loss": 0.7802, + "step": 2383, + "time": 12.92 + }, + { + "epoch": 2.29, + "learning_rate": "1.6579e-04", + "loss": 0.7335, + "slid_loss": 0.7807, + "step": 2384, + "time": 14.3 + }, + { + "epoch": 2.29, + "learning_rate": "1.6576e-04", + "loss": 0.7939, + "slid_loss": 0.7806, + "step": 2385, + "time": 12.65 + }, + { + "epoch": 2.29, + "learning_rate": "1.6572e-04", + "loss": 0.8367, + "slid_loss": 0.7809, + "step": 2386, + "time": 13.37 + }, + { + "epoch": 2.29, + "learning_rate": "1.6569e-04", + "loss": 0.7839, + "slid_loss": 0.7801, + "step": 2387, + "time": 13.19 + }, + { + "epoch": 2.29, + "learning_rate": "1.6566e-04", + "loss": 0.6895, + "slid_loss": 0.7793, + "step": 2388, + "time": 12.85 + }, + { + "epoch": 2.29, + "learning_rate": "1.6563e-04", + "loss": 0.7283, + "slid_loss": 0.7793, + "step": 2389, + "time": 12.35 + }, + { + "epoch": 2.3, + "learning_rate": "1.6560e-04", + "loss": 0.7126, + "slid_loss": 0.7779, + "step": 2390, + "time": 13.61 + }, + { + "epoch": 2.3, + "learning_rate": "1.6557e-04", + "loss": 0.7547, + "slid_loss": 0.7774, + "step": 2391, + "time": 12.68 + }, + { + "epoch": 2.3, + "learning_rate": "1.6553e-04", + "loss": 0.9244, + "slid_loss": 0.7782, + "step": 2392, + "time": 13.96 + }, + { + "epoch": 2.3, + "learning_rate": "1.6550e-04", + "loss": 0.8088, + "slid_loss": 0.7787, + "step": 2393, + "time": 13.87 + }, + { + "epoch": 2.3, + "learning_rate": "1.6547e-04", + "loss": 0.7744, + "slid_loss": 0.7795, + "step": 2394, + "time": 12.89 + }, + { + "epoch": 2.3, + "learning_rate": "1.6544e-04", + "loss": 0.6976, + "slid_loss": 0.7791, + "step": 2395, + "time": 13.27 + }, + { + "epoch": 2.3, + "learning_rate": "1.6541e-04", + "loss": 0.7995, + "slid_loss": 0.7786, + "step": 2396, + "time": 13.11 + }, + { + "epoch": 2.3, + "learning_rate": "1.6537e-04", + "loss": 0.78, + "slid_loss": 0.7785, + "step": 2397, + "time": 13.87 + }, + { + "epoch": 2.3, + "learning_rate": "1.6534e-04", + "loss": 0.7534, + "slid_loss": 0.7776, + "step": 2398, + "time": 12.73 + }, + { + "epoch": 2.3, + "learning_rate": "1.6531e-04", + "loss": 0.749, + "slid_loss": 0.7766, + "step": 2399, + "time": 13.4 + }, + { + "epoch": 2.31, + "learning_rate": "1.6528e-04", + "loss": 0.8434, + "slid_loss": 0.7774, + "step": 2400, + "time": 13.12 + }, + { + "epoch": 2.31, + "learning_rate": "1.6525e-04", + "loss": 0.8167, + "slid_loss": 0.7779, + "step": 2401, + "time": 11.93 + }, + { + "epoch": 2.31, + "learning_rate": "1.6521e-04", + "loss": 0.7369, + "slid_loss": 0.7781, + "step": 2402, + "time": 13.65 + }, + { + "epoch": 2.31, + "learning_rate": "1.6518e-04", + "loss": 0.7046, + "slid_loss": 0.7767, + "step": 2403, + "time": 13.19 + }, + { + "epoch": 2.31, + "learning_rate": "1.6515e-04", + "loss": 0.8158, + "slid_loss": 0.7767, + "step": 2404, + "time": 13.11 + }, + { + "epoch": 2.31, + "learning_rate": "1.6512e-04", + "loss": 0.8183, + "slid_loss": 0.7773, + "step": 2405, + "time": 13.58 + }, + { + "epoch": 2.31, + "learning_rate": "1.6509e-04", + "loss": 0.7396, + "slid_loss": 0.777, + "step": 2406, + "time": 13.9 + }, + { + "epoch": 2.31, + "learning_rate": "1.6505e-04", + "loss": 0.8199, + "slid_loss": 0.7772, + "step": 2407, + "time": 14.2 + }, + { + "epoch": 2.31, + "learning_rate": "1.6502e-04", + "loss": 0.7895, + "slid_loss": 0.7766, + "step": 2408, + "time": 12.91 + }, + { + "epoch": 2.31, + "learning_rate": "1.6499e-04", + "loss": 0.7928, + "slid_loss": 0.7765, + "step": 2409, + "time": 13.44 + }, + { + "epoch": 2.32, + "learning_rate": "1.6496e-04", + "loss": 0.7698, + "slid_loss": 0.7764, + "step": 2410, + "time": 14.13 + }, + { + "epoch": 2.32, + "learning_rate": "1.6493e-04", + "loss": 0.8022, + "slid_loss": 0.7773, + "step": 2411, + "time": 11.31 + }, + { + "epoch": 2.32, + "learning_rate": "1.6489e-04", + "loss": 0.7736, + "slid_loss": 0.7766, + "step": 2412, + "time": 12.9 + }, + { + "epoch": 2.32, + "learning_rate": "1.6486e-04", + "loss": 0.783, + "slid_loss": 0.7766, + "step": 2413, + "time": 12.88 + }, + { + "epoch": 2.32, + "learning_rate": "1.6483e-04", + "loss": 0.7696, + "slid_loss": 0.7768, + "step": 2414, + "time": 12.81 + }, + { + "epoch": 2.32, + "learning_rate": "1.6480e-04", + "loss": 0.6847, + "slid_loss": 0.7752, + "step": 2415, + "time": 13.18 + }, + { + "epoch": 2.32, + "learning_rate": "1.6477e-04", + "loss": 0.7745, + "slid_loss": 0.7749, + "step": 2416, + "time": 13.22 + }, + { + "epoch": 2.32, + "learning_rate": "1.6473e-04", + "loss": 0.8408, + "slid_loss": 0.7748, + "step": 2417, + "time": 13.21 + }, + { + "epoch": 2.32, + "learning_rate": "1.6470e-04", + "loss": 0.6568, + "slid_loss": 0.7735, + "step": 2418, + "time": 12.92 + }, + { + "epoch": 2.32, + "learning_rate": "1.6467e-04", + "loss": 0.8376, + "slid_loss": 0.7734, + "step": 2419, + "time": 12.87 + }, + { + "epoch": 2.32, + "learning_rate": "1.6464e-04", + "loss": 0.7695, + "slid_loss": 0.7726, + "step": 2420, + "time": 12.99 + }, + { + "epoch": 2.33, + "learning_rate": "1.6461e-04", + "loss": 0.6859, + "slid_loss": 0.7718, + "step": 2421, + "time": 12.27 + }, + { + "epoch": 2.33, + "learning_rate": "1.6457e-04", + "loss": 0.7864, + "slid_loss": 0.7722, + "step": 2422, + "time": 13.59 + }, + { + "epoch": 2.33, + "learning_rate": "1.6454e-04", + "loss": 0.7615, + "slid_loss": 0.7717, + "step": 2423, + "time": 13.63 + }, + { + "epoch": 2.33, + "learning_rate": "1.6451e-04", + "loss": 0.6796, + "slid_loss": 0.7713, + "step": 2424, + "time": 13.85 + }, + { + "epoch": 2.33, + "learning_rate": "1.6448e-04", + "loss": 0.7267, + "slid_loss": 0.7708, + "step": 2425, + "time": 13.24 + }, + { + "epoch": 2.33, + "learning_rate": "1.6445e-04", + "loss": 0.6368, + "slid_loss": 0.7696, + "step": 2426, + "time": 13.12 + }, + { + "epoch": 2.33, + "learning_rate": "1.6441e-04", + "loss": 0.728, + "slid_loss": 0.7683, + "step": 2427, + "time": 13.27 + }, + { + "epoch": 2.33, + "learning_rate": "1.6438e-04", + "loss": 0.8599, + "slid_loss": 0.7692, + "step": 2428, + "time": 13.34 + }, + { + "epoch": 2.33, + "learning_rate": "1.6435e-04", + "loss": 0.7962, + "slid_loss": 0.7687, + "step": 2429, + "time": 13.97 + }, + { + "epoch": 2.33, + "learning_rate": "1.6432e-04", + "loss": 0.7078, + "slid_loss": 0.7683, + "step": 2430, + "time": 12.85 + }, + { + "epoch": 2.34, + "learning_rate": "1.6429e-04", + "loss": 0.7533, + "slid_loss": 0.7694, + "step": 2431, + "time": 13.73 + }, + { + "epoch": 2.34, + "learning_rate": "1.6425e-04", + "loss": 0.7104, + "slid_loss": 0.7688, + "step": 2432, + "time": 13.94 + }, + { + "epoch": 2.34, + "learning_rate": "1.6422e-04", + "loss": 0.7481, + "slid_loss": 0.7678, + "step": 2433, + "time": 13.45 + }, + { + "epoch": 2.34, + "learning_rate": "1.6419e-04", + "loss": 0.7471, + "slid_loss": 0.7676, + "step": 2434, + "time": 13.52 + }, + { + "epoch": 2.34, + "learning_rate": "1.6416e-04", + "loss": 0.7693, + "slid_loss": 0.7675, + "step": 2435, + "time": 13.86 + }, + { + "epoch": 2.34, + "learning_rate": "1.6412e-04", + "loss": 0.843, + "slid_loss": 0.7687, + "step": 2436, + "time": 13.61 + }, + { + "epoch": 2.34, + "learning_rate": "1.6409e-04", + "loss": 0.776, + "slid_loss": 0.7684, + "step": 2437, + "time": 12.81 + }, + { + "epoch": 2.34, + "learning_rate": "1.6406e-04", + "loss": 0.8103, + "slid_loss": 0.7685, + "step": 2438, + "time": 13.75 + }, + { + "epoch": 2.34, + "learning_rate": "1.6403e-04", + "loss": 0.823, + "slid_loss": 0.7691, + "step": 2439, + "time": 13.67 + }, + { + "epoch": 2.34, + "learning_rate": "1.6400e-04", + "loss": 0.7896, + "slid_loss": 0.7692, + "step": 2440, + "time": 12.99 + }, + { + "epoch": 2.34, + "learning_rate": "1.6396e-04", + "loss": 0.8034, + "slid_loss": 0.77, + "step": 2441, + "time": 13.27 + }, + { + "epoch": 2.35, + "learning_rate": "1.6393e-04", + "loss": 0.7983, + "slid_loss": 0.7695, + "step": 2442, + "time": 13.22 + }, + { + "epoch": 2.35, + "learning_rate": "1.6390e-04", + "loss": 0.7707, + "slid_loss": 0.77, + "step": 2443, + "time": 14.22 + }, + { + "epoch": 2.35, + "learning_rate": "1.6387e-04", + "loss": 0.7461, + "slid_loss": 0.7704, + "step": 2444, + "time": 11.59 + }, + { + "epoch": 2.35, + "learning_rate": "1.6383e-04", + "loss": 0.7276, + "slid_loss": 0.7693, + "step": 2445, + "time": 11.76 + }, + { + "epoch": 2.35, + "learning_rate": "1.6380e-04", + "loss": 0.7686, + "slid_loss": 0.7689, + "step": 2446, + "time": 14.17 + }, + { + "epoch": 2.35, + "learning_rate": "1.6377e-04", + "loss": 0.8094, + "slid_loss": 0.7691, + "step": 2447, + "time": 13.4 + }, + { + "epoch": 2.35, + "learning_rate": "1.6374e-04", + "loss": 0.8325, + "slid_loss": 0.7697, + "step": 2448, + "time": 13.94 + }, + { + "epoch": 2.35, + "learning_rate": "1.6371e-04", + "loss": 0.8013, + "slid_loss": 0.769, + "step": 2449, + "time": 13.46 + }, + { + "epoch": 2.35, + "learning_rate": "1.6367e-04", + "loss": 0.7921, + "slid_loss": 0.7694, + "step": 2450, + "time": 12.91 + }, + { + "epoch": 2.35, + "learning_rate": "1.6364e-04", + "loss": 0.8464, + "slid_loss": 0.7699, + "step": 2451, + "time": 12.76 + }, + { + "epoch": 2.36, + "learning_rate": "1.6361e-04", + "loss": 0.7586, + "slid_loss": 0.7695, + "step": 2452, + "time": 12.79 + }, + { + "epoch": 2.36, + "learning_rate": "1.6358e-04", + "loss": 0.7911, + "slid_loss": 0.7697, + "step": 2453, + "time": 13.54 + }, + { + "epoch": 2.36, + "learning_rate": "1.6354e-04", + "loss": 0.7466, + "slid_loss": 0.7695, + "step": 2454, + "time": 13.21 + }, + { + "epoch": 2.36, + "learning_rate": "1.6351e-04", + "loss": 0.773, + "slid_loss": 0.7699, + "step": 2455, + "time": 11.56 + }, + { + "epoch": 2.36, + "learning_rate": "1.6348e-04", + "loss": 0.7781, + "slid_loss": 0.7689, + "step": 2456, + "time": 11.76 + }, + { + "epoch": 2.36, + "learning_rate": "1.6345e-04", + "loss": 0.718, + "slid_loss": 0.768, + "step": 2457, + "time": 12.81 + }, + { + "epoch": 2.36, + "learning_rate": "1.6342e-04", + "loss": 0.7654, + "slid_loss": 0.7685, + "step": 2458, + "time": 14.09 + }, + { + "epoch": 2.36, + "learning_rate": "1.6338e-04", + "loss": 0.7935, + "slid_loss": 0.7682, + "step": 2459, + "time": 11.66 + }, + { + "epoch": 2.36, + "learning_rate": "1.6335e-04", + "loss": 0.7984, + "slid_loss": 0.7687, + "step": 2460, + "time": 13.9 + }, + { + "epoch": 2.36, + "learning_rate": "1.6332e-04", + "loss": 0.9206, + "slid_loss": 0.7696, + "step": 2461, + "time": 12.22 + }, + { + "epoch": 2.37, + "learning_rate": "1.6329e-04", + "loss": 0.7754, + "slid_loss": 0.77, + "step": 2462, + "time": 11.66 + }, + { + "epoch": 2.37, + "learning_rate": "1.6325e-04", + "loss": 0.6438, + "slid_loss": 0.7688, + "step": 2463, + "time": 12.09 + }, + { + "epoch": 2.37, + "learning_rate": "1.6322e-04", + "loss": 0.7966, + "slid_loss": 0.7692, + "step": 2464, + "time": 13.54 + }, + { + "epoch": 2.37, + "learning_rate": "1.6319e-04", + "loss": 0.7955, + "slid_loss": 0.7684, + "step": 2465, + "time": 13.7 + }, + { + "epoch": 2.37, + "learning_rate": "1.6316e-04", + "loss": 0.7837, + "slid_loss": 0.7683, + "step": 2466, + "time": 13.58 + }, + { + "epoch": 2.37, + "learning_rate": "1.6312e-04", + "loss": 0.7989, + "slid_loss": 0.7691, + "step": 2467, + "time": 11.4 + }, + { + "epoch": 2.37, + "learning_rate": "1.6309e-04", + "loss": 0.6558, + "slid_loss": 0.7683, + "step": 2468, + "time": 13.77 + }, + { + "epoch": 2.37, + "learning_rate": "1.6306e-04", + "loss": 0.7811, + "slid_loss": 0.768, + "step": 2469, + "time": 11.42 + }, + { + "epoch": 2.37, + "learning_rate": "1.6303e-04", + "loss": 0.8034, + "slid_loss": 0.7685, + "step": 2470, + "time": 12.5 + }, + { + "epoch": 2.37, + "learning_rate": "1.6299e-04", + "loss": 0.8127, + "slid_loss": 0.7689, + "step": 2471, + "time": 11.92 + }, + { + "epoch": 2.37, + "learning_rate": "1.6296e-04", + "loss": 0.7403, + "slid_loss": 0.7677, + "step": 2472, + "time": 13.83 + }, + { + "epoch": 2.38, + "learning_rate": "1.6293e-04", + "loss": 0.6105, + "slid_loss": 0.7673, + "step": 2473, + "time": 12.73 + }, + { + "epoch": 2.38, + "learning_rate": "1.6290e-04", + "loss": 0.8256, + "slid_loss": 0.769, + "step": 2474, + "time": 14.34 + }, + { + "epoch": 2.38, + "learning_rate": "1.6287e-04", + "loss": 0.7896, + "slid_loss": 0.769, + "step": 2475, + "time": 13.15 + }, + { + "epoch": 2.38, + "learning_rate": "1.6283e-04", + "loss": 0.7023, + "slid_loss": 0.7688, + "step": 2476, + "time": 13.3 + }, + { + "epoch": 2.38, + "learning_rate": "1.6280e-04", + "loss": 0.7481, + "slid_loss": 0.7693, + "step": 2477, + "time": 13.23 + }, + { + "epoch": 2.38, + "learning_rate": "1.6277e-04", + "loss": 0.7926, + "slid_loss": 0.7706, + "step": 2478, + "time": 13.0 + }, + { + "epoch": 2.38, + "learning_rate": "1.6274e-04", + "loss": 0.6504, + "slid_loss": 0.77, + "step": 2479, + "time": 12.52 + }, + { + "epoch": 2.38, + "learning_rate": "1.6270e-04", + "loss": 0.7893, + "slid_loss": 0.7701, + "step": 2480, + "time": 13.77 + }, + { + "epoch": 2.38, + "learning_rate": "1.6267e-04", + "loss": 0.8186, + "slid_loss": 0.771, + "step": 2481, + "time": 13.03 + }, + { + "epoch": 2.38, + "learning_rate": "1.6264e-04", + "loss": 0.893, + "slid_loss": 0.7719, + "step": 2482, + "time": 13.02 + }, + { + "epoch": 2.39, + "learning_rate": "1.6261e-04", + "loss": 0.8132, + "slid_loss": 0.7716, + "step": 2483, + "time": 12.39 + }, + { + "epoch": 2.39, + "learning_rate": "1.6257e-04", + "loss": 0.776, + "slid_loss": 0.772, + "step": 2484, + "time": 13.84 + }, + { + "epoch": 2.39, + "learning_rate": "1.6254e-04", + "loss": 0.802, + "slid_loss": 0.7721, + "step": 2485, + "time": 13.25 + }, + { + "epoch": 2.39, + "learning_rate": "1.6251e-04", + "loss": 0.7807, + "slid_loss": 0.7715, + "step": 2486, + "time": 12.99 + }, + { + "epoch": 2.39, + "learning_rate": "1.6248e-04", + "loss": 0.7731, + "slid_loss": 0.7714, + "step": 2487, + "time": 11.42 + }, + { + "epoch": 2.39, + "learning_rate": "1.6244e-04", + "loss": 0.846, + "slid_loss": 0.773, + "step": 2488, + "time": 13.81 + }, + { + "epoch": 2.39, + "learning_rate": "1.6241e-04", + "loss": 0.7734, + "slid_loss": 0.7734, + "step": 2489, + "time": 11.75 + }, + { + "epoch": 2.39, + "learning_rate": "1.6238e-04", + "loss": 0.712, + "slid_loss": 0.7734, + "step": 2490, + "time": 13.24 + }, + { + "epoch": 2.39, + "learning_rate": "1.6235e-04", + "loss": 0.8626, + "slid_loss": 0.7745, + "step": 2491, + "time": 12.19 + }, + { + "epoch": 2.39, + "learning_rate": "1.6231e-04", + "loss": 0.7459, + "slid_loss": 0.7727, + "step": 2492, + "time": 12.83 + }, + { + "epoch": 2.39, + "learning_rate": "1.6228e-04", + "loss": 0.7971, + "slid_loss": 0.7726, + "step": 2493, + "time": 13.2 + }, + { + "epoch": 2.4, + "learning_rate": "1.6225e-04", + "loss": 0.7954, + "slid_loss": 0.7728, + "step": 2494, + "time": 12.89 + }, + { + "epoch": 2.4, + "learning_rate": "1.6222e-04", + "loss": 0.7523, + "slid_loss": 0.7734, + "step": 2495, + "time": 11.43 + }, + { + "epoch": 2.4, + "learning_rate": "1.6218e-04", + "loss": 0.7914, + "slid_loss": 0.7733, + "step": 2496, + "time": 13.32 + }, + { + "epoch": 2.4, + "learning_rate": "1.6215e-04", + "loss": 0.7158, + "slid_loss": 0.7726, + "step": 2497, + "time": 13.42 + }, + { + "epoch": 2.4, + "learning_rate": "1.6212e-04", + "loss": 0.8404, + "slid_loss": 0.7735, + "step": 2498, + "time": 12.83 + }, + { + "epoch": 2.4, + "learning_rate": "1.6209e-04", + "loss": 0.8212, + "slid_loss": 0.7742, + "step": 2499, + "time": 13.79 + }, + { + "epoch": 2.4, + "learning_rate": "1.6205e-04", + "loss": 0.8013, + "slid_loss": 0.7738, + "step": 2500, + "time": 12.94 + }, + { + "epoch": 2.4, + "learning_rate": "1.6202e-04", + "loss": 0.7077, + "slid_loss": 0.7727, + "step": 2501, + "time": 13.25 + }, + { + "epoch": 2.4, + "learning_rate": "1.6199e-04", + "loss": 0.7785, + "slid_loss": 0.7731, + "step": 2502, + "time": 11.4 + }, + { + "epoch": 2.4, + "learning_rate": "1.6196e-04", + "loss": 0.7267, + "slid_loss": 0.7734, + "step": 2503, + "time": 13.52 + }, + { + "epoch": 2.41, + "learning_rate": "1.6192e-04", + "loss": 0.8219, + "slid_loss": 0.7734, + "step": 2504, + "time": 11.8 + }, + { + "epoch": 2.41, + "learning_rate": "1.6189e-04", + "loss": 0.6668, + "slid_loss": 0.7719, + "step": 2505, + "time": 13.76 + }, + { + "epoch": 2.41, + "learning_rate": "1.6186e-04", + "loss": 0.7414, + "slid_loss": 0.7719, + "step": 2506, + "time": 13.7 + }, + { + "epoch": 2.41, + "learning_rate": "1.6183e-04", + "loss": 0.6569, + "slid_loss": 0.7703, + "step": 2507, + "time": 13.13 + }, + { + "epoch": 2.41, + "learning_rate": "1.6179e-04", + "loss": 0.7284, + "slid_loss": 0.7697, + "step": 2508, + "time": 13.0 + }, + { + "epoch": 2.41, + "learning_rate": "1.6176e-04", + "loss": 0.7826, + "slid_loss": 0.7696, + "step": 2509, + "time": 13.34 + }, + { + "epoch": 2.41, + "learning_rate": "1.6173e-04", + "loss": 0.7546, + "slid_loss": 0.7694, + "step": 2510, + "time": 13.78 + }, + { + "epoch": 2.41, + "learning_rate": "1.6170e-04", + "loss": 0.7023, + "slid_loss": 0.7684, + "step": 2511, + "time": 13.73 + }, + { + "epoch": 2.41, + "learning_rate": "1.6166e-04", + "loss": 0.776, + "slid_loss": 0.7685, + "step": 2512, + "time": 13.21 + }, + { + "epoch": 2.41, + "learning_rate": "1.6163e-04", + "loss": 0.6859, + "slid_loss": 0.7675, + "step": 2513, + "time": 13.17 + }, + { + "epoch": 2.41, + "learning_rate": "1.6160e-04", + "loss": 0.7071, + "slid_loss": 0.7669, + "step": 2514, + "time": 12.72 + }, + { + "epoch": 2.42, + "learning_rate": "1.6156e-04", + "loss": 0.7482, + "slid_loss": 0.7675, + "step": 2515, + "time": 13.44 + }, + { + "epoch": 2.42, + "learning_rate": "1.6153e-04", + "loss": 0.6813, + "slid_loss": 0.7666, + "step": 2516, + "time": 13.6 + }, + { + "epoch": 2.42, + "learning_rate": "1.6150e-04", + "loss": 0.7776, + "slid_loss": 0.7659, + "step": 2517, + "time": 14.03 + }, + { + "epoch": 2.42, + "learning_rate": "1.6147e-04", + "loss": 0.7137, + "slid_loss": 0.7665, + "step": 2518, + "time": 12.07 + }, + { + "epoch": 2.42, + "learning_rate": "1.6143e-04", + "loss": 0.7073, + "slid_loss": 0.7652, + "step": 2519, + "time": 13.14 + }, + { + "epoch": 2.42, + "learning_rate": "1.6140e-04", + "loss": 0.7791, + "slid_loss": 0.7653, + "step": 2520, + "time": 13.7 + }, + { + "epoch": 2.42, + "learning_rate": "1.6137e-04", + "loss": 0.7411, + "slid_loss": 0.7658, + "step": 2521, + "time": 13.34 + }, + { + "epoch": 2.42, + "learning_rate": "1.6134e-04", + "loss": 0.793, + "slid_loss": 0.7659, + "step": 2522, + "time": 13.55 + }, + { + "epoch": 2.42, + "learning_rate": "1.6130e-04", + "loss": 0.805, + "slid_loss": 0.7663, + "step": 2523, + "time": 13.16 + }, + { + "epoch": 2.42, + "learning_rate": "1.6127e-04", + "loss": 0.726, + "slid_loss": 0.7668, + "step": 2524, + "time": 12.54 + }, + { + "epoch": 2.43, + "learning_rate": "1.6124e-04", + "loss": 0.7142, + "slid_loss": 0.7667, + "step": 2525, + "time": 13.24 + }, + { + "epoch": 2.43, + "learning_rate": "1.6121e-04", + "loss": 0.7898, + "slid_loss": 0.7682, + "step": 2526, + "time": 12.49 + }, + { + "epoch": 2.43, + "learning_rate": "1.6117e-04", + "loss": 0.8513, + "slid_loss": 0.7694, + "step": 2527, + "time": 12.47 + }, + { + "epoch": 2.43, + "learning_rate": "1.6114e-04", + "loss": 0.7983, + "slid_loss": 0.7688, + "step": 2528, + "time": 11.48 + }, + { + "epoch": 2.43, + "learning_rate": "1.6111e-04", + "loss": 0.7522, + "slid_loss": 0.7684, + "step": 2529, + "time": 13.78 + }, + { + "epoch": 2.43, + "learning_rate": "1.6107e-04", + "loss": 0.7608, + "slid_loss": 0.7689, + "step": 2530, + "time": 11.59 + }, + { + "epoch": 2.43, + "learning_rate": "1.6104e-04", + "loss": 0.7395, + "slid_loss": 0.7688, + "step": 2531, + "time": 13.38 + }, + { + "epoch": 2.43, + "learning_rate": "1.6101e-04", + "loss": 0.6857, + "slid_loss": 0.7685, + "step": 2532, + "time": 13.33 + }, + { + "epoch": 2.43, + "learning_rate": "1.6098e-04", + "loss": 0.7653, + "slid_loss": 0.7687, + "step": 2533, + "time": 13.58 + }, + { + "epoch": 2.43, + "learning_rate": "1.6094e-04", + "loss": 0.7789, + "slid_loss": 0.769, + "step": 2534, + "time": 11.68 + }, + { + "epoch": 2.44, + "learning_rate": "1.6091e-04", + "loss": 0.7356, + "slid_loss": 0.7687, + "step": 2535, + "time": 12.18 + }, + { + "epoch": 2.44, + "learning_rate": "1.6088e-04", + "loss": 0.8095, + "slid_loss": 0.7684, + "step": 2536, + "time": 13.32 + }, + { + "epoch": 2.44, + "learning_rate": "1.6085e-04", + "loss": 0.7227, + "slid_loss": 0.7678, + "step": 2537, + "time": 13.75 + }, + { + "epoch": 2.44, + "learning_rate": "1.6081e-04", + "loss": 0.788, + "slid_loss": 0.7676, + "step": 2538, + "time": 13.2 + }, + { + "epoch": 2.44, + "learning_rate": "1.6078e-04", + "loss": 0.8287, + "slid_loss": 0.7677, + "step": 2539, + "time": 11.58 + }, + { + "epoch": 2.44, + "learning_rate": "1.6075e-04", + "loss": 0.7958, + "slid_loss": 0.7677, + "step": 2540, + "time": 13.51 + }, + { + "epoch": 2.44, + "learning_rate": "1.6071e-04", + "loss": 0.7521, + "slid_loss": 0.7672, + "step": 2541, + "time": 12.79 + }, + { + "epoch": 2.44, + "learning_rate": "1.6068e-04", + "loss": 0.7179, + "slid_loss": 0.7664, + "step": 2542, + "time": 13.41 + }, + { + "epoch": 2.44, + "learning_rate": "1.6065e-04", + "loss": 0.7349, + "slid_loss": 0.766, + "step": 2543, + "time": 13.47 + }, + { + "epoch": 2.44, + "learning_rate": "1.6062e-04", + "loss": 0.7754, + "slid_loss": 0.7663, + "step": 2544, + "time": 13.83 + }, + { + "epoch": 2.44, + "learning_rate": "1.6058e-04", + "loss": 0.8546, + "slid_loss": 0.7676, + "step": 2545, + "time": 11.85 + }, + { + "epoch": 2.45, + "learning_rate": "1.6055e-04", + "loss": 0.802, + "slid_loss": 0.7679, + "step": 2546, + "time": 13.27 + }, + { + "epoch": 2.45, + "learning_rate": "1.6052e-04", + "loss": 0.8564, + "slid_loss": 0.7684, + "step": 2547, + "time": 13.3 + }, + { + "epoch": 2.45, + "learning_rate": "1.6049e-04", + "loss": 0.8153, + "slid_loss": 0.7682, + "step": 2548, + "time": 12.88 + }, + { + "epoch": 2.45, + "learning_rate": "1.6045e-04", + "loss": 0.7352, + "slid_loss": 0.7676, + "step": 2549, + "time": 12.95 + }, + { + "epoch": 2.45, + "learning_rate": "1.6042e-04", + "loss": 0.6608, + "slid_loss": 0.7663, + "step": 2550, + "time": 13.3 + }, + { + "epoch": 2.45, + "learning_rate": "1.6039e-04", + "loss": 0.7716, + "slid_loss": 0.7655, + "step": 2551, + "time": 12.16 + }, + { + "epoch": 2.45, + "learning_rate": "1.6035e-04", + "loss": 0.7292, + "slid_loss": 0.7652, + "step": 2552, + "time": 12.29 + }, + { + "epoch": 2.45, + "learning_rate": "1.6032e-04", + "loss": 0.7519, + "slid_loss": 0.7648, + "step": 2553, + "time": 11.67 + }, + { + "epoch": 2.45, + "learning_rate": "1.6029e-04", + "loss": 0.7953, + "slid_loss": 0.7653, + "step": 2554, + "time": 14.36 + }, + { + "epoch": 2.45, + "learning_rate": "1.6026e-04", + "loss": 0.7437, + "slid_loss": 0.765, + "step": 2555, + "time": 13.53 + }, + { + "epoch": 2.46, + "learning_rate": "1.6022e-04", + "loss": 0.6395, + "slid_loss": 0.7636, + "step": 2556, + "time": 12.01 + }, + { + "epoch": 2.46, + "learning_rate": "1.6019e-04", + "loss": 0.744, + "slid_loss": 0.7639, + "step": 2557, + "time": 13.27 + }, + { + "epoch": 2.46, + "learning_rate": "1.6016e-04", + "loss": 0.7535, + "slid_loss": 0.7638, + "step": 2558, + "time": 11.39 + }, + { + "epoch": 2.46, + "learning_rate": "1.6012e-04", + "loss": 0.6509, + "slid_loss": 0.7624, + "step": 2559, + "time": 12.11 + }, + { + "epoch": 2.46, + "learning_rate": "1.6009e-04", + "loss": 0.8245, + "slid_loss": 0.7626, + "step": 2560, + "time": 14.74 + }, + { + "epoch": 2.46, + "learning_rate": "1.6006e-04", + "loss": 0.7419, + "slid_loss": 0.7608, + "step": 2561, + "time": 13.9 + }, + { + "epoch": 2.46, + "learning_rate": "1.6003e-04", + "loss": 0.7167, + "slid_loss": 0.7602, + "step": 2562, + "time": 11.71 + }, + { + "epoch": 2.46, + "learning_rate": "1.5999e-04", + "loss": 0.7835, + "slid_loss": 0.7616, + "step": 2563, + "time": 13.64 + }, + { + "epoch": 2.46, + "learning_rate": "1.5996e-04", + "loss": 0.8066, + "slid_loss": 0.7617, + "step": 2564, + "time": 13.08 + }, + { + "epoch": 2.46, + "learning_rate": "1.5993e-04", + "loss": 0.6595, + "slid_loss": 0.7604, + "step": 2565, + "time": 13.75 + }, + { + "epoch": 2.46, + "learning_rate": "1.5989e-04", + "loss": 0.7741, + "slid_loss": 0.7603, + "step": 2566, + "time": 13.6 + }, + { + "epoch": 2.47, + "learning_rate": "1.5986e-04", + "loss": 0.7507, + "slid_loss": 0.7598, + "step": 2567, + "time": 13.95 + }, + { + "epoch": 2.47, + "learning_rate": "1.5983e-04", + "loss": 0.7065, + "slid_loss": 0.7603, + "step": 2568, + "time": 12.95 + }, + { + "epoch": 2.47, + "learning_rate": "1.5980e-04", + "loss": 0.8069, + "slid_loss": 0.7606, + "step": 2569, + "time": 13.77 + }, + { + "epoch": 2.47, + "learning_rate": "1.5976e-04", + "loss": 0.717, + "slid_loss": 0.7597, + "step": 2570, + "time": 14.05 + }, + { + "epoch": 2.47, + "learning_rate": "1.5973e-04", + "loss": 0.7404, + "slid_loss": 0.759, + "step": 2571, + "time": 14.09 + }, + { + "epoch": 2.47, + "learning_rate": "1.5970e-04", + "loss": 0.81, + "slid_loss": 0.7597, + "step": 2572, + "time": 14.25 + }, + { + "epoch": 2.47, + "learning_rate": "1.5966e-04", + "loss": 0.816, + "slid_loss": 0.7617, + "step": 2573, + "time": 14.41 + }, + { + "epoch": 2.47, + "learning_rate": "1.5963e-04", + "loss": 0.7489, + "slid_loss": 0.761, + "step": 2574, + "time": 13.61 + }, + { + "epoch": 2.47, + "learning_rate": "1.5960e-04", + "loss": 0.7789, + "slid_loss": 0.7609, + "step": 2575, + "time": 13.1 + }, + { + "epoch": 2.47, + "learning_rate": "1.5957e-04", + "loss": 0.7427, + "slid_loss": 0.7613, + "step": 2576, + "time": 14.12 + }, + { + "epoch": 2.48, + "learning_rate": "1.5953e-04", + "loss": 0.7926, + "slid_loss": 0.7617, + "step": 2577, + "time": 13.63 + }, + { + "epoch": 2.48, + "learning_rate": "1.5950e-04", + "loss": 0.7035, + "slid_loss": 0.7608, + "step": 2578, + "time": 11.24 + }, + { + "epoch": 2.48, + "learning_rate": "1.5947e-04", + "loss": 0.8069, + "slid_loss": 0.7624, + "step": 2579, + "time": 12.09 + }, + { + "epoch": 2.48, + "learning_rate": "1.5943e-04", + "loss": 0.6796, + "slid_loss": 0.7613, + "step": 2580, + "time": 13.26 + }, + { + "epoch": 2.48, + "learning_rate": "1.5940e-04", + "loss": 0.7261, + "slid_loss": 0.7604, + "step": 2581, + "time": 14.15 + }, + { + "epoch": 2.48, + "learning_rate": "1.5937e-04", + "loss": 0.8672, + "slid_loss": 0.7601, + "step": 2582, + "time": 13.69 + }, + { + "epoch": 2.48, + "learning_rate": "1.5933e-04", + "loss": 0.7342, + "slid_loss": 0.7593, + "step": 2583, + "time": 11.96 + }, + { + "epoch": 2.48, + "learning_rate": "1.5930e-04", + "loss": 0.7754, + "slid_loss": 0.7593, + "step": 2584, + "time": 13.56 + }, + { + "epoch": 2.48, + "learning_rate": "1.5927e-04", + "loss": 0.7658, + "slid_loss": 0.7589, + "step": 2585, + "time": 13.11 + }, + { + "epoch": 2.48, + "learning_rate": "1.5924e-04", + "loss": 0.7196, + "slid_loss": 0.7583, + "step": 2586, + "time": 12.28 + }, + { + "epoch": 2.49, + "learning_rate": "1.5920e-04", + "loss": 0.7761, + "slid_loss": 0.7584, + "step": 2587, + "time": 11.17 + }, + { + "epoch": 2.49, + "learning_rate": "1.5917e-04", + "loss": 0.7566, + "slid_loss": 0.7575, + "step": 2588, + "time": 14.45 + }, + { + "epoch": 2.49, + "learning_rate": "1.5914e-04", + "loss": 0.6523, + "slid_loss": 0.7563, + "step": 2589, + "time": 12.95 + }, + { + "epoch": 2.49, + "learning_rate": "1.5910e-04", + "loss": 0.7621, + "slid_loss": 0.7568, + "step": 2590, + "time": 12.92 + }, + { + "epoch": 2.49, + "learning_rate": "1.5907e-04", + "loss": 0.8299, + "slid_loss": 0.7564, + "step": 2591, + "time": 13.4 + }, + { + "epoch": 2.49, + "learning_rate": "1.5904e-04", + "loss": 0.7154, + "slid_loss": 0.7561, + "step": 2592, + "time": 14.56 + }, + { + "epoch": 2.49, + "learning_rate": "1.5901e-04", + "loss": 0.7038, + "slid_loss": 0.7552, + "step": 2593, + "time": 13.6 + }, + { + "epoch": 2.49, + "learning_rate": "1.5897e-04", + "loss": 0.752, + "slid_loss": 0.7548, + "step": 2594, + "time": 12.77 + }, + { + "epoch": 2.49, + "learning_rate": "1.5894e-04", + "loss": 0.7786, + "slid_loss": 0.755, + "step": 2595, + "time": 13.68 + }, + { + "epoch": 2.49, + "learning_rate": "1.5891e-04", + "loss": 0.7727, + "slid_loss": 0.7548, + "step": 2596, + "time": 11.73 + }, + { + "epoch": 2.49, + "learning_rate": "1.5887e-04", + "loss": 0.7248, + "slid_loss": 0.7549, + "step": 2597, + "time": 13.75 + }, + { + "epoch": 2.5, + "learning_rate": "1.5884e-04", + "loss": 0.7293, + "slid_loss": 0.7538, + "step": 2598, + "time": 13.57 + }, + { + "epoch": 2.5, + "learning_rate": "1.5881e-04", + "loss": 0.7143, + "slid_loss": 0.7527, + "step": 2599, + "time": 13.44 + }, + { + "epoch": 2.5, + "learning_rate": "1.5877e-04", + "loss": 0.8009, + "slid_loss": 0.7527, + "step": 2600, + "time": 12.79 + }, + { + "epoch": 2.5, + "learning_rate": "1.5874e-04", + "loss": 0.7705, + "slid_loss": 0.7534, + "step": 2601, + "time": 12.02 + }, + { + "epoch": 2.5, + "learning_rate": "1.5871e-04", + "loss": 0.7312, + "slid_loss": 0.7529, + "step": 2602, + "time": 12.34 + }, + { + "epoch": 2.5, + "learning_rate": "1.5868e-04", + "loss": 0.7906, + "slid_loss": 0.7535, + "step": 2603, + "time": 13.32 + }, + { + "epoch": 2.5, + "learning_rate": "1.5864e-04", + "loss": 0.7094, + "slid_loss": 0.7524, + "step": 2604, + "time": 12.95 + }, + { + "epoch": 2.5, + "learning_rate": "1.5861e-04", + "loss": 0.8538, + "slid_loss": 0.7543, + "step": 2605, + "time": 12.99 + }, + { + "epoch": 2.5, + "learning_rate": "1.5858e-04", + "loss": 0.7116, + "slid_loss": 0.754, + "step": 2606, + "time": 11.37 + }, + { + "epoch": 2.5, + "learning_rate": "1.5854e-04", + "loss": 0.7694, + "slid_loss": 0.7551, + "step": 2607, + "time": 12.83 + }, + { + "epoch": 2.51, + "learning_rate": "1.5851e-04", + "loss": 0.765, + "slid_loss": 0.7555, + "step": 2608, + "time": 12.34 + }, + { + "epoch": 2.51, + "learning_rate": "1.5848e-04", + "loss": 0.7744, + "slid_loss": 0.7554, + "step": 2609, + "time": 11.05 + }, + { + "epoch": 2.51, + "learning_rate": "1.5844e-04", + "loss": 0.7855, + "slid_loss": 0.7557, + "step": 2610, + "time": 12.87 + }, + { + "epoch": 2.51, + "learning_rate": "1.5841e-04", + "loss": 0.6962, + "slid_loss": 0.7556, + "step": 2611, + "time": 12.62 + }, + { + "epoch": 2.51, + "learning_rate": "1.5838e-04", + "loss": 0.7646, + "slid_loss": 0.7555, + "step": 2612, + "time": 13.6 + }, + { + "epoch": 2.51, + "learning_rate": "1.5834e-04", + "loss": 0.8084, + "slid_loss": 0.7568, + "step": 2613, + "time": 13.52 + }, + { + "epoch": 2.51, + "learning_rate": "1.5831e-04", + "loss": 0.7533, + "slid_loss": 0.7572, + "step": 2614, + "time": 13.19 + }, + { + "epoch": 2.51, + "learning_rate": "1.5828e-04", + "loss": 0.7667, + "slid_loss": 0.7574, + "step": 2615, + "time": 13.25 + }, + { + "epoch": 2.51, + "learning_rate": "1.5825e-04", + "loss": 0.6648, + "slid_loss": 0.7572, + "step": 2616, + "time": 12.96 + }, + { + "epoch": 2.51, + "learning_rate": "1.5821e-04", + "loss": 0.6563, + "slid_loss": 0.756, + "step": 2617, + "time": 12.97 + }, + { + "epoch": 2.51, + "learning_rate": "1.5818e-04", + "loss": 0.6764, + "slid_loss": 0.7557, + "step": 2618, + "time": 11.88 + }, + { + "epoch": 2.52, + "learning_rate": "1.5815e-04", + "loss": 0.798, + "slid_loss": 0.7566, + "step": 2619, + "time": 13.25 + }, + { + "epoch": 2.52, + "learning_rate": "1.5811e-04", + "loss": 0.8125, + "slid_loss": 0.7569, + "step": 2620, + "time": 13.33 + }, + { + "epoch": 2.52, + "learning_rate": "1.5808e-04", + "loss": 0.8022, + "slid_loss": 0.7575, + "step": 2621, + "time": 13.69 + }, + { + "epoch": 2.52, + "learning_rate": "1.5805e-04", + "loss": 0.7877, + "slid_loss": 0.7574, + "step": 2622, + "time": 13.84 + }, + { + "epoch": 2.52, + "learning_rate": "1.5801e-04", + "loss": 0.843, + "slid_loss": 0.7578, + "step": 2623, + "time": 12.85 + }, + { + "epoch": 2.52, + "learning_rate": "1.5798e-04", + "loss": 0.6315, + "slid_loss": 0.7569, + "step": 2624, + "time": 14.01 + }, + { + "epoch": 2.52, + "learning_rate": "1.5795e-04", + "loss": 0.734, + "slid_loss": 0.7571, + "step": 2625, + "time": 13.53 + }, + { + "epoch": 2.52, + "learning_rate": "1.5791e-04", + "loss": 0.7069, + "slid_loss": 0.7563, + "step": 2626, + "time": 12.75 + }, + { + "epoch": 2.52, + "learning_rate": "1.5788e-04", + "loss": 0.7965, + "slid_loss": 0.7557, + "step": 2627, + "time": 12.39 + }, + { + "epoch": 2.52, + "learning_rate": "1.5785e-04", + "loss": 0.6697, + "slid_loss": 0.7544, + "step": 2628, + "time": 13.87 + }, + { + "epoch": 2.53, + "learning_rate": "1.5782e-04", + "loss": 0.6838, + "slid_loss": 0.7537, + "step": 2629, + "time": 13.07 + }, + { + "epoch": 2.53, + "learning_rate": "1.5778e-04", + "loss": 0.8238, + "slid_loss": 0.7544, + "step": 2630, + "time": 13.42 + }, + { + "epoch": 2.53, + "learning_rate": "1.5775e-04", + "loss": 0.7702, + "slid_loss": 0.7547, + "step": 2631, + "time": 13.29 + }, + { + "epoch": 2.53, + "learning_rate": "1.5772e-04", + "loss": 0.8191, + "slid_loss": 0.756, + "step": 2632, + "time": 13.01 + }, + { + "epoch": 2.53, + "learning_rate": "1.5768e-04", + "loss": 0.7865, + "slid_loss": 0.7562, + "step": 2633, + "time": 13.83 + }, + { + "epoch": 2.53, + "learning_rate": "1.5765e-04", + "loss": 0.7266, + "slid_loss": 0.7557, + "step": 2634, + "time": 11.02 + }, + { + "epoch": 2.53, + "learning_rate": "1.5762e-04", + "loss": 0.7764, + "slid_loss": 0.7561, + "step": 2635, + "time": 13.73 + }, + { + "epoch": 2.53, + "learning_rate": "1.5758e-04", + "loss": 0.7986, + "slid_loss": 0.756, + "step": 2636, + "time": 13.77 + }, + { + "epoch": 2.53, + "learning_rate": "1.5755e-04", + "loss": 0.6551, + "slid_loss": 0.7553, + "step": 2637, + "time": 13.48 + }, + { + "epoch": 2.53, + "learning_rate": "1.5752e-04", + "loss": 0.7846, + "slid_loss": 0.7553, + "step": 2638, + "time": 14.18 + }, + { + "epoch": 2.54, + "learning_rate": "1.5748e-04", + "loss": 0.6623, + "slid_loss": 0.7536, + "step": 2639, + "time": 13.57 + }, + { + "epoch": 2.54, + "learning_rate": "1.5745e-04", + "loss": 0.8719, + "slid_loss": 0.7544, + "step": 2640, + "time": 13.44 + }, + { + "epoch": 2.54, + "learning_rate": "1.5742e-04", + "loss": 0.7279, + "slid_loss": 0.7541, + "step": 2641, + "time": 13.36 + }, + { + "epoch": 2.54, + "learning_rate": "1.5738e-04", + "loss": 0.719, + "slid_loss": 0.7541, + "step": 2642, + "time": 13.82 + }, + { + "epoch": 2.54, + "learning_rate": "1.5735e-04", + "loss": 0.7999, + "slid_loss": 0.7548, + "step": 2643, + "time": 12.75 + }, + { + "epoch": 2.54, + "learning_rate": "1.5732e-04", + "loss": 0.7951, + "slid_loss": 0.755, + "step": 2644, + "time": 14.26 + }, + { + "epoch": 2.54, + "learning_rate": "1.5728e-04", + "loss": 0.737, + "slid_loss": 0.7538, + "step": 2645, + "time": 13.46 + }, + { + "epoch": 2.54, + "learning_rate": "1.5725e-04", + "loss": 0.7354, + "slid_loss": 0.7531, + "step": 2646, + "time": 12.83 + }, + { + "epoch": 2.54, + "learning_rate": "1.5722e-04", + "loss": 0.6958, + "slid_loss": 0.7515, + "step": 2647, + "time": 14.17 + }, + { + "epoch": 2.54, + "learning_rate": "1.5719e-04", + "loss": 0.8315, + "slid_loss": 0.7517, + "step": 2648, + "time": 11.62 + }, + { + "epoch": 2.54, + "learning_rate": "1.5715e-04", + "loss": 0.7353, + "slid_loss": 0.7517, + "step": 2649, + "time": 13.43 + }, + { + "epoch": 2.55, + "learning_rate": "1.5712e-04", + "loss": 0.7678, + "slid_loss": 0.7528, + "step": 2650, + "time": 13.96 + }, + { + "epoch": 2.55, + "learning_rate": "1.5709e-04", + "loss": 0.7072, + "slid_loss": 0.7521, + "step": 2651, + "time": 14.27 + }, + { + "epoch": 2.55, + "learning_rate": "1.5705e-04", + "loss": 0.7597, + "slid_loss": 0.7524, + "step": 2652, + "time": 12.73 + }, + { + "epoch": 2.55, + "learning_rate": "1.5702e-04", + "loss": 0.6534, + "slid_loss": 0.7515, + "step": 2653, + "time": 12.77 + }, + { + "epoch": 2.55, + "learning_rate": "1.5699e-04", + "loss": 0.7455, + "slid_loss": 0.751, + "step": 2654, + "time": 13.33 + }, + { + "epoch": 2.55, + "learning_rate": "1.5695e-04", + "loss": 0.7421, + "slid_loss": 0.7509, + "step": 2655, + "time": 12.66 + }, + { + "epoch": 2.55, + "learning_rate": "1.5692e-04", + "loss": 0.7313, + "slid_loss": 0.7519, + "step": 2656, + "time": 13.32 + }, + { + "epoch": 2.55, + "learning_rate": "1.5689e-04", + "loss": 0.6588, + "slid_loss": 0.751, + "step": 2657, + "time": 11.37 + }, + { + "epoch": 2.55, + "learning_rate": "1.5685e-04", + "loss": 0.7248, + "slid_loss": 0.7507, + "step": 2658, + "time": 12.93 + }, + { + "epoch": 2.55, + "learning_rate": "1.5682e-04", + "loss": 0.8126, + "slid_loss": 0.7523, + "step": 2659, + "time": 13.05 + }, + { + "epoch": 2.56, + "learning_rate": "1.5679e-04", + "loss": 0.5946, + "slid_loss": 0.75, + "step": 2660, + "time": 14.45 + }, + { + "epoch": 2.56, + "learning_rate": "1.5675e-04", + "loss": 0.7433, + "slid_loss": 0.75, + "step": 2661, + "time": 13.63 + }, + { + "epoch": 2.56, + "learning_rate": "1.5672e-04", + "loss": 0.7957, + "slid_loss": 0.7508, + "step": 2662, + "time": 13.54 + }, + { + "epoch": 2.56, + "learning_rate": "1.5669e-04", + "loss": 0.7429, + "slid_loss": 0.7504, + "step": 2663, + "time": 11.41 + }, + { + "epoch": 2.56, + "learning_rate": "1.5665e-04", + "loss": 0.7322, + "slid_loss": 0.7497, + "step": 2664, + "time": 13.21 + }, + { + "epoch": 2.56, + "learning_rate": "1.5662e-04", + "loss": 0.7457, + "slid_loss": 0.7505, + "step": 2665, + "time": 13.63 + }, + { + "epoch": 2.56, + "learning_rate": "1.5659e-04", + "loss": 0.74, + "slid_loss": 0.7502, + "step": 2666, + "time": 12.84 + }, + { + "epoch": 2.56, + "learning_rate": "1.5655e-04", + "loss": 0.8403, + "slid_loss": 0.7511, + "step": 2667, + "time": 13.0 + }, + { + "epoch": 2.56, + "learning_rate": "1.5652e-04", + "loss": 0.6667, + "slid_loss": 0.7507, + "step": 2668, + "time": 11.65 + }, + { + "epoch": 2.56, + "learning_rate": "1.5649e-04", + "loss": 0.7343, + "slid_loss": 0.75, + "step": 2669, + "time": 11.75 + }, + { + "epoch": 2.56, + "learning_rate": "1.5645e-04", + "loss": 0.7765, + "slid_loss": 0.7506, + "step": 2670, + "time": 12.88 + }, + { + "epoch": 2.57, + "learning_rate": "1.5642e-04", + "loss": 0.6627, + "slid_loss": 0.7498, + "step": 2671, + "time": 13.38 + }, + { + "epoch": 2.57, + "learning_rate": "1.5639e-04", + "loss": 0.7361, + "slid_loss": 0.7491, + "step": 2672, + "time": 12.75 + }, + { + "epoch": 2.57, + "learning_rate": "1.5635e-04", + "loss": 0.7962, + "slid_loss": 0.7489, + "step": 2673, + "time": 13.2 + }, + { + "epoch": 2.57, + "learning_rate": "1.5632e-04", + "loss": 0.7437, + "slid_loss": 0.7488, + "step": 2674, + "time": 12.32 + }, + { + "epoch": 2.57, + "learning_rate": "1.5629e-04", + "loss": 0.7281, + "slid_loss": 0.7483, + "step": 2675, + "time": 11.29 + }, + { + "epoch": 2.57, + "learning_rate": "1.5625e-04", + "loss": 0.8678, + "slid_loss": 0.7496, + "step": 2676, + "time": 13.77 + }, + { + "epoch": 2.57, + "learning_rate": "1.5622e-04", + "loss": 0.7707, + "slid_loss": 0.7493, + "step": 2677, + "time": 13.75 + }, + { + "epoch": 2.57, + "learning_rate": "1.5619e-04", + "loss": 0.904, + "slid_loss": 0.7513, + "step": 2678, + "time": 13.39 + }, + { + "epoch": 2.57, + "learning_rate": "1.5615e-04", + "loss": 0.7357, + "slid_loss": 0.7506, + "step": 2679, + "time": 13.5 + }, + { + "epoch": 2.57, + "learning_rate": "1.5612e-04", + "loss": 0.686, + "slid_loss": 0.7507, + "step": 2680, + "time": 13.65 + }, + { + "epoch": 2.58, + "learning_rate": "1.5609e-04", + "loss": 0.7255, + "slid_loss": 0.7507, + "step": 2681, + "time": 13.27 + }, + { + "epoch": 2.58, + "learning_rate": "1.5606e-04", + "loss": 0.7308, + "slid_loss": 0.7493, + "step": 2682, + "time": 12.14 + }, + { + "epoch": 2.58, + "learning_rate": "1.5602e-04", + "loss": 0.7224, + "slid_loss": 0.7492, + "step": 2683, + "time": 13.42 + }, + { + "epoch": 2.58, + "learning_rate": "1.5599e-04", + "loss": 0.6316, + "slid_loss": 0.7478, + "step": 2684, + "time": 13.52 + }, + { + "epoch": 2.58, + "learning_rate": "1.5596e-04", + "loss": 0.677, + "slid_loss": 0.7469, + "step": 2685, + "time": 13.12 + }, + { + "epoch": 2.58, + "learning_rate": "1.5592e-04", + "loss": 0.8373, + "slid_loss": 0.7481, + "step": 2686, + "time": 12.17 + }, + { + "epoch": 2.58, + "learning_rate": "1.5589e-04", + "loss": 0.6994, + "slid_loss": 0.7473, + "step": 2687, + "time": 13.72 + }, + { + "epoch": 2.58, + "learning_rate": "1.5586e-04", + "loss": 0.6977, + "slid_loss": 0.7467, + "step": 2688, + "time": 12.97 + }, + { + "epoch": 2.58, + "learning_rate": "1.5582e-04", + "loss": 0.7113, + "slid_loss": 0.7473, + "step": 2689, + "time": 11.48 + }, + { + "epoch": 2.58, + "learning_rate": "1.5579e-04", + "loss": 0.677, + "slid_loss": 0.7464, + "step": 2690, + "time": 13.25 + }, + { + "epoch": 2.59, + "learning_rate": "1.5576e-04", + "loss": 0.7765, + "slid_loss": 0.7459, + "step": 2691, + "time": 13.79 + }, + { + "epoch": 2.59, + "learning_rate": "1.5572e-04", + "loss": 0.7437, + "slid_loss": 0.7462, + "step": 2692, + "time": 13.98 + }, + { + "epoch": 2.59, + "learning_rate": "1.5569e-04", + "loss": 0.7709, + "slid_loss": 0.7469, + "step": 2693, + "time": 13.85 + }, + { + "epoch": 2.59, + "learning_rate": "1.5566e-04", + "loss": 0.6544, + "slid_loss": 0.7459, + "step": 2694, + "time": 11.57 + }, + { + "epoch": 2.59, + "learning_rate": "1.5562e-04", + "loss": 0.6891, + "slid_loss": 0.745, + "step": 2695, + "time": 12.94 + }, + { + "epoch": 2.59, + "learning_rate": "1.5559e-04", + "loss": 0.7832, + "slid_loss": 0.7451, + "step": 2696, + "time": 13.12 + }, + { + "epoch": 2.59, + "learning_rate": "1.5556e-04", + "loss": 0.6314, + "slid_loss": 0.7442, + "step": 2697, + "time": 11.83 + }, + { + "epoch": 2.59, + "learning_rate": "1.5552e-04", + "loss": 0.6901, + "slid_loss": 0.7438, + "step": 2698, + "time": 12.82 + }, + { + "epoch": 2.59, + "learning_rate": "1.5549e-04", + "loss": 0.7755, + "slid_loss": 0.7444, + "step": 2699, + "time": 13.69 + }, + { + "epoch": 2.59, + "learning_rate": "1.5546e-04", + "loss": 0.6637, + "slid_loss": 0.743, + "step": 2700, + "time": 13.17 + }, + { + "epoch": 2.59, + "learning_rate": "1.5542e-04", + "loss": 0.8759, + "slid_loss": 0.7441, + "step": 2701, + "time": 13.27 + }, + { + "epoch": 2.6, + "learning_rate": "1.5539e-04", + "loss": 0.6961, + "slid_loss": 0.7437, + "step": 2702, + "time": 12.88 + }, + { + "epoch": 2.6, + "learning_rate": "1.5536e-04", + "loss": 0.8323, + "slid_loss": 0.7441, + "step": 2703, + "time": 12.97 + }, + { + "epoch": 2.6, + "learning_rate": "1.5532e-04", + "loss": 0.7245, + "slid_loss": 0.7443, + "step": 2704, + "time": 12.83 + }, + { + "epoch": 2.6, + "learning_rate": "1.5529e-04", + "loss": 0.7406, + "slid_loss": 0.7431, + "step": 2705, + "time": 11.41 + }, + { + "epoch": 2.6, + "learning_rate": "1.5526e-04", + "loss": 0.6497, + "slid_loss": 0.7425, + "step": 2706, + "time": 12.99 + }, + { + "epoch": 2.6, + "learning_rate": "1.5522e-04", + "loss": 0.7763, + "slid_loss": 0.7426, + "step": 2707, + "time": 13.22 + }, + { + "epoch": 2.6, + "learning_rate": "1.5519e-04", + "loss": 0.8114, + "slid_loss": 0.7431, + "step": 2708, + "time": 13.42 + }, + { + "epoch": 2.6, + "learning_rate": "1.5516e-04", + "loss": 0.7379, + "slid_loss": 0.7427, + "step": 2709, + "time": 13.53 + }, + { + "epoch": 2.6, + "learning_rate": "1.5512e-04", + "loss": 0.8181, + "slid_loss": 0.743, + "step": 2710, + "time": 10.93 + }, + { + "epoch": 2.6, + "learning_rate": "1.5509e-04", + "loss": 0.6495, + "slid_loss": 0.7426, + "step": 2711, + "time": 13.27 + }, + { + "epoch": 2.61, + "learning_rate": "1.5506e-04", + "loss": 0.8279, + "slid_loss": 0.7432, + "step": 2712, + "time": 13.43 + }, + { + "epoch": 2.61, + "learning_rate": "1.5502e-04", + "loss": 0.7724, + "slid_loss": 0.7428, + "step": 2713, + "time": 13.7 + }, + { + "epoch": 2.61, + "learning_rate": "1.5499e-04", + "loss": 0.6782, + "slid_loss": 0.7421, + "step": 2714, + "time": 11.71 + }, + { + "epoch": 2.61, + "learning_rate": "1.5496e-04", + "loss": 0.7126, + "slid_loss": 0.7415, + "step": 2715, + "time": 11.73 + }, + { + "epoch": 2.61, + "learning_rate": "1.5492e-04", + "loss": 0.6476, + "slid_loss": 0.7414, + "step": 2716, + "time": 13.64 + }, + { + "epoch": 2.61, + "learning_rate": "1.5489e-04", + "loss": 0.6864, + "slid_loss": 0.7417, + "step": 2717, + "time": 13.59 + }, + { + "epoch": 2.61, + "learning_rate": "1.5485e-04", + "loss": 0.7045, + "slid_loss": 0.7419, + "step": 2718, + "time": 13.48 + }, + { + "epoch": 2.61, + "learning_rate": "1.5482e-04", + "loss": 0.6649, + "slid_loss": 0.7406, + "step": 2719, + "time": 14.0 + }, + { + "epoch": 2.61, + "learning_rate": "1.5479e-04", + "loss": 0.6917, + "slid_loss": 0.7394, + "step": 2720, + "time": 12.27 + }, + { + "epoch": 2.61, + "learning_rate": "1.5475e-04", + "loss": 0.7116, + "slid_loss": 0.7385, + "step": 2721, + "time": 14.17 + }, + { + "epoch": 2.61, + "learning_rate": "1.5472e-04", + "loss": 0.7193, + "slid_loss": 0.7378, + "step": 2722, + "time": 13.13 + }, + { + "epoch": 2.62, + "learning_rate": "1.5469e-04", + "loss": 0.7463, + "slid_loss": 0.7369, + "step": 2723, + "time": 13.46 + }, + { + "epoch": 2.62, + "learning_rate": "1.5465e-04", + "loss": 0.7429, + "slid_loss": 0.738, + "step": 2724, + "time": 13.53 + }, + { + "epoch": 2.62, + "learning_rate": "1.5462e-04", + "loss": 0.6853, + "slid_loss": 0.7375, + "step": 2725, + "time": 13.25 + }, + { + "epoch": 2.62, + "learning_rate": "1.5459e-04", + "loss": 0.8209, + "slid_loss": 0.7386, + "step": 2726, + "time": 13.9 + }, + { + "epoch": 2.62, + "learning_rate": "1.5455e-04", + "loss": 0.7261, + "slid_loss": 0.7379, + "step": 2727, + "time": 13.43 + }, + { + "epoch": 2.62, + "learning_rate": "1.5452e-04", + "loss": 0.6481, + "slid_loss": 0.7377, + "step": 2728, + "time": 13.54 + }, + { + "epoch": 2.62, + "learning_rate": "1.5449e-04", + "loss": 0.8078, + "slid_loss": 0.7389, + "step": 2729, + "time": 14.13 + }, + { + "epoch": 2.62, + "learning_rate": "1.5445e-04", + "loss": 0.6865, + "slid_loss": 0.7376, + "step": 2730, + "time": 13.55 + }, + { + "epoch": 2.62, + "learning_rate": "1.5442e-04", + "loss": 0.6555, + "slid_loss": 0.7364, + "step": 2731, + "time": 13.28 + }, + { + "epoch": 2.62, + "learning_rate": "1.5439e-04", + "loss": 0.6904, + "slid_loss": 0.7351, + "step": 2732, + "time": 13.79 + }, + { + "epoch": 2.63, + "learning_rate": "1.5435e-04", + "loss": 0.7761, + "slid_loss": 0.735, + "step": 2733, + "time": 13.6 + }, + { + "epoch": 2.63, + "learning_rate": "1.5432e-04", + "loss": 0.7068, + "slid_loss": 0.7348, + "step": 2734, + "time": 12.99 + }, + { + "epoch": 2.63, + "learning_rate": "1.5429e-04", + "loss": 0.6965, + "slid_loss": 0.734, + "step": 2735, + "time": 11.67 + }, + { + "epoch": 2.63, + "learning_rate": "1.5425e-04", + "loss": 0.8568, + "slid_loss": 0.7346, + "step": 2736, + "time": 11.45 + }, + { + "epoch": 2.63, + "learning_rate": "1.5422e-04", + "loss": 0.8005, + "slid_loss": 0.7361, + "step": 2737, + "time": 13.22 + }, + { + "epoch": 2.63, + "learning_rate": "1.5419e-04", + "loss": 0.7905, + "slid_loss": 0.7361, + "step": 2738, + "time": 14.06 + }, + { + "epoch": 2.63, + "learning_rate": "1.5415e-04", + "loss": 0.6963, + "slid_loss": 0.7365, + "step": 2739, + "time": 13.65 + }, + { + "epoch": 2.63, + "learning_rate": "1.5412e-04", + "loss": 0.6523, + "slid_loss": 0.7343, + "step": 2740, + "time": 13.29 + }, + { + "epoch": 2.63, + "learning_rate": "1.5409e-04", + "loss": 0.8105, + "slid_loss": 0.7351, + "step": 2741, + "time": 13.33 + }, + { + "epoch": 2.63, + "learning_rate": "1.5405e-04", + "loss": 0.7098, + "slid_loss": 0.735, + "step": 2742, + "time": 13.77 + }, + { + "epoch": 2.63, + "learning_rate": "1.5402e-04", + "loss": 0.7424, + "slid_loss": 0.7344, + "step": 2743, + "time": 11.6 + }, + { + "epoch": 2.64, + "learning_rate": "1.5399e-04", + "loss": 0.8228, + "slid_loss": 0.7347, + "step": 2744, + "time": 12.91 + }, + { + "epoch": 2.64, + "learning_rate": "1.5395e-04", + "loss": 0.7302, + "slid_loss": 0.7346, + "step": 2745, + "time": 13.34 + }, + { + "epoch": 2.64, + "learning_rate": "1.5392e-04", + "loss": 0.7406, + "slid_loss": 0.7347, + "step": 2746, + "time": 13.81 + }, + { + "epoch": 2.64, + "learning_rate": "1.5389e-04", + "loss": 0.7007, + "slid_loss": 0.7347, + "step": 2747, + "time": 13.4 + }, + { + "epoch": 2.64, + "learning_rate": "1.5385e-04", + "loss": 0.6864, + "slid_loss": 0.7333, + "step": 2748, + "time": 13.65 + }, + { + "epoch": 2.64, + "learning_rate": "1.5382e-04", + "loss": 0.8617, + "slid_loss": 0.7346, + "step": 2749, + "time": 12.96 + }, + { + "epoch": 2.64, + "learning_rate": "1.5379e-04", + "loss": 0.7864, + "slid_loss": 0.7347, + "step": 2750, + "time": 13.68 + }, + { + "epoch": 2.64, + "learning_rate": "1.5375e-04", + "loss": 0.7584, + "slid_loss": 0.7353, + "step": 2751, + "time": 11.16 + }, + { + "epoch": 2.64, + "learning_rate": "1.5372e-04", + "loss": 0.7596, + "slid_loss": 0.7353, + "step": 2752, + "time": 14.07 + }, + { + "epoch": 2.64, + "learning_rate": "1.5369e-04", + "loss": 0.7005, + "slid_loss": 0.7357, + "step": 2753, + "time": 13.4 + }, + { + "epoch": 2.65, + "learning_rate": "1.5365e-04", + "loss": 0.8186, + "slid_loss": 0.7365, + "step": 2754, + "time": 13.67 + }, + { + "epoch": 2.65, + "learning_rate": "1.5362e-04", + "loss": 0.6628, + "slid_loss": 0.7357, + "step": 2755, + "time": 13.37 + }, + { + "epoch": 2.65, + "learning_rate": "1.5359e-04", + "loss": 0.7217, + "slid_loss": 0.7356, + "step": 2756, + "time": 13.34 + }, + { + "epoch": 2.65, + "learning_rate": "1.5355e-04", + "loss": 0.7855, + "slid_loss": 0.7368, + "step": 2757, + "time": 11.57 + }, + { + "epoch": 2.65, + "learning_rate": "1.5352e-04", + "loss": 0.7084, + "slid_loss": 0.7367, + "step": 2758, + "time": 13.37 + }, + { + "epoch": 2.65, + "learning_rate": "1.5348e-04", + "loss": 0.709, + "slid_loss": 0.7356, + "step": 2759, + "time": 13.83 + }, + { + "epoch": 2.65, + "learning_rate": "1.5345e-04", + "loss": 0.6412, + "slid_loss": 0.7361, + "step": 2760, + "time": 13.37 + }, + { + "epoch": 2.65, + "learning_rate": "1.5342e-04", + "loss": 0.7029, + "slid_loss": 0.7357, + "step": 2761, + "time": 11.96 + }, + { + "epoch": 2.65, + "learning_rate": "1.5338e-04", + "loss": 0.7364, + "slid_loss": 0.7351, + "step": 2762, + "time": 13.85 + }, + { + "epoch": 2.65, + "learning_rate": "1.5335e-04", + "loss": 0.7596, + "slid_loss": 0.7353, + "step": 2763, + "time": 12.19 + }, + { + "epoch": 2.66, + "learning_rate": "1.5332e-04", + "loss": 0.8489, + "slid_loss": 0.7364, + "step": 2764, + "time": 13.46 + }, + { + "epoch": 2.66, + "learning_rate": "1.5328e-04", + "loss": 0.7683, + "slid_loss": 0.7367, + "step": 2765, + "time": 11.91 + }, + { + "epoch": 2.66, + "learning_rate": "1.5325e-04", + "loss": 0.742, + "slid_loss": 0.7367, + "step": 2766, + "time": 12.6 + }, + { + "epoch": 2.66, + "learning_rate": "1.5322e-04", + "loss": 0.7009, + "slid_loss": 0.7353, + "step": 2767, + "time": 13.71 + }, + { + "epoch": 2.66, + "learning_rate": "1.5318e-04", + "loss": 0.6136, + "slid_loss": 0.7348, + "step": 2768, + "time": 13.55 + }, + { + "epoch": 2.66, + "learning_rate": "1.5315e-04", + "loss": 0.7672, + "slid_loss": 0.7351, + "step": 2769, + "time": 13.58 + }, + { + "epoch": 2.66, + "learning_rate": "1.5312e-04", + "loss": 0.7279, + "slid_loss": 0.7346, + "step": 2770, + "time": 14.4 + }, + { + "epoch": 2.66, + "learning_rate": "1.5308e-04", + "loss": 0.7057, + "slid_loss": 0.735, + "step": 2771, + "time": 13.97 + }, + { + "epoch": 2.66, + "learning_rate": "1.5305e-04", + "loss": 0.7315, + "slid_loss": 0.735, + "step": 2772, + "time": 12.06 + }, + { + "epoch": 2.66, + "learning_rate": "1.5302e-04", + "loss": 0.7345, + "slid_loss": 0.7344, + "step": 2773, + "time": 11.48 + }, + { + "epoch": 2.66, + "learning_rate": "1.5298e-04", + "loss": 0.6731, + "slid_loss": 0.7337, + "step": 2774, + "time": 13.87 + }, + { + "epoch": 2.67, + "learning_rate": "1.5295e-04", + "loss": 0.6577, + "slid_loss": 0.733, + "step": 2775, + "time": 13.4 + }, + { + "epoch": 2.67, + "learning_rate": "1.5292e-04", + "loss": 0.7373, + "slid_loss": 0.7316, + "step": 2776, + "time": 13.4 + }, + { + "epoch": 2.67, + "learning_rate": "1.5288e-04", + "loss": 0.7546, + "slid_loss": 0.7315, + "step": 2777, + "time": 13.41 + }, + { + "epoch": 2.67, + "learning_rate": "1.5285e-04", + "loss": 0.632, + "slid_loss": 0.7288, + "step": 2778, + "time": 12.21 + }, + { + "epoch": 2.67, + "learning_rate": "1.5282e-04", + "loss": 0.7545, + "slid_loss": 0.729, + "step": 2779, + "time": 12.89 + }, + { + "epoch": 2.67, + "learning_rate": "1.5278e-04", + "loss": 0.7351, + "slid_loss": 0.7294, + "step": 2780, + "time": 14.14 + }, + { + "epoch": 2.67, + "learning_rate": "1.5275e-04", + "loss": 0.7375, + "slid_loss": 0.7296, + "step": 2781, + "time": 13.26 + }, + { + "epoch": 2.67, + "learning_rate": "1.5272e-04", + "loss": 0.7292, + "slid_loss": 0.7296, + "step": 2782, + "time": 12.92 + }, + { + "epoch": 2.67, + "learning_rate": "1.5268e-04", + "loss": 0.721, + "slid_loss": 0.7295, + "step": 2783, + "time": 13.45 + }, + { + "epoch": 2.67, + "learning_rate": "1.5265e-04", + "loss": 0.727, + "slid_loss": 0.7305, + "step": 2784, + "time": 11.08 + }, + { + "epoch": 2.68, + "learning_rate": "1.5261e-04", + "loss": 0.6365, + "slid_loss": 0.7301, + "step": 2785, + "time": 13.73 + }, + { + "epoch": 2.68, + "learning_rate": "1.5258e-04", + "loss": 0.7794, + "slid_loss": 0.7295, + "step": 2786, + "time": 12.68 + }, + { + "epoch": 2.68, + "learning_rate": "1.5255e-04", + "loss": 0.7507, + "slid_loss": 0.73, + "step": 2787, + "time": 12.89 + }, + { + "epoch": 2.68, + "learning_rate": "1.5251e-04", + "loss": 0.716, + "slid_loss": 0.7302, + "step": 2788, + "time": 13.66 + }, + { + "epoch": 2.68, + "learning_rate": "1.5248e-04", + "loss": 0.7236, + "slid_loss": 0.7303, + "step": 2789, + "time": 12.43 + }, + { + "epoch": 2.68, + "learning_rate": "1.5245e-04", + "loss": 0.755, + "slid_loss": 0.7311, + "step": 2790, + "time": 13.33 + }, + { + "epoch": 2.68, + "learning_rate": "1.5241e-04", + "loss": 0.7382, + "slid_loss": 0.7307, + "step": 2791, + "time": 13.39 + }, + { + "epoch": 2.68, + "learning_rate": "1.5238e-04", + "loss": 0.6568, + "slid_loss": 0.7299, + "step": 2792, + "time": 13.3 + }, + { + "epoch": 2.68, + "learning_rate": "1.5235e-04", + "loss": 0.7227, + "slid_loss": 0.7294, + "step": 2793, + "time": 12.2 + }, + { + "epoch": 2.68, + "learning_rate": "1.5231e-04", + "loss": 0.7255, + "slid_loss": 0.7301, + "step": 2794, + "time": 11.74 + }, + { + "epoch": 2.68, + "learning_rate": "1.5228e-04", + "loss": 0.709, + "slid_loss": 0.7303, + "step": 2795, + "time": 12.85 + }, + { + "epoch": 2.69, + "learning_rate": "1.5225e-04", + "loss": 0.8038, + "slid_loss": 0.7305, + "step": 2796, + "time": 11.36 + }, + { + "epoch": 2.69, + "learning_rate": "1.5221e-04", + "loss": 0.8184, + "slid_loss": 0.7324, + "step": 2797, + "time": 14.33 + }, + { + "epoch": 2.69, + "learning_rate": "1.5218e-04", + "loss": 0.6162, + "slid_loss": 0.7316, + "step": 2798, + "time": 13.86 + }, + { + "epoch": 2.69, + "learning_rate": "1.5215e-04", + "loss": 0.799, + "slid_loss": 0.7319, + "step": 2799, + "time": 11.63 + }, + { + "epoch": 2.69, + "learning_rate": "1.5211e-04", + "loss": 0.6719, + "slid_loss": 0.7319, + "step": 2800, + "time": 13.03 + }, + { + "epoch": 2.69, + "learning_rate": "1.5208e-04", + "loss": 0.702, + "slid_loss": 0.7302, + "step": 2801, + "time": 13.73 + }, + { + "epoch": 2.69, + "learning_rate": "1.5205e-04", + "loss": 0.6694, + "slid_loss": 0.7299, + "step": 2802, + "time": 13.86 + }, + { + "epoch": 2.69, + "learning_rate": "1.5201e-04", + "loss": 0.6669, + "slid_loss": 0.7283, + "step": 2803, + "time": 12.87 + }, + { + "epoch": 2.69, + "learning_rate": "1.5198e-04", + "loss": 0.7653, + "slid_loss": 0.7287, + "step": 2804, + "time": 13.45 + }, + { + "epoch": 2.69, + "learning_rate": "1.5194e-04", + "loss": 0.728, + "slid_loss": 0.7286, + "step": 2805, + "time": 12.89 + }, + { + "epoch": 2.7, + "learning_rate": "1.5191e-04", + "loss": 0.7077, + "slid_loss": 0.7291, + "step": 2806, + "time": 13.18 + }, + { + "epoch": 2.7, + "learning_rate": "1.5188e-04", + "loss": 0.7786, + "slid_loss": 0.7292, + "step": 2807, + "time": 11.83 + }, + { + "epoch": 2.7, + "learning_rate": "1.5184e-04", + "loss": 0.6313, + "slid_loss": 0.7274, + "step": 2808, + "time": 12.81 + }, + { + "epoch": 2.7, + "learning_rate": "1.5181e-04", + "loss": 0.7382, + "slid_loss": 0.7274, + "step": 2809, + "time": 14.42 + }, + { + "epoch": 2.7, + "learning_rate": "1.5178e-04", + "loss": 0.7741, + "slid_loss": 0.7269, + "step": 2810, + "time": 13.27 + }, + { + "epoch": 2.7, + "learning_rate": "1.5174e-04", + "loss": 0.7427, + "slid_loss": 0.7279, + "step": 2811, + "time": 12.01 + }, + { + "epoch": 2.7, + "learning_rate": "1.5171e-04", + "loss": 0.6997, + "slid_loss": 0.7266, + "step": 2812, + "time": 11.5 + }, + { + "epoch": 2.7, + "learning_rate": "1.5168e-04", + "loss": 0.7278, + "slid_loss": 0.7261, + "step": 2813, + "time": 13.17 + }, + { + "epoch": 2.7, + "learning_rate": "1.5164e-04", + "loss": 0.7322, + "slid_loss": 0.7267, + "step": 2814, + "time": 13.01 + }, + { + "epoch": 2.7, + "learning_rate": "1.5161e-04", + "loss": 0.7417, + "slid_loss": 0.727, + "step": 2815, + "time": 13.39 + }, + { + "epoch": 2.71, + "learning_rate": "1.5158e-04", + "loss": 0.6702, + "slid_loss": 0.7272, + "step": 2816, + "time": 12.71 + }, + { + "epoch": 2.71, + "learning_rate": "1.5154e-04", + "loss": 0.7166, + "slid_loss": 0.7275, + "step": 2817, + "time": 14.09 + }, + { + "epoch": 2.71, + "learning_rate": "1.5151e-04", + "loss": 0.7633, + "slid_loss": 0.7281, + "step": 2818, + "time": 11.45 + }, + { + "epoch": 2.71, + "learning_rate": "1.5148e-04", + "loss": 0.8231, + "slid_loss": 0.7297, + "step": 2819, + "time": 12.11 + }, + { + "epoch": 2.71, + "learning_rate": "1.5144e-04", + "loss": 0.7897, + "slid_loss": 0.7306, + "step": 2820, + "time": 13.19 + }, + { + "epoch": 2.71, + "learning_rate": "1.5141e-04", + "loss": 0.6531, + "slid_loss": 0.7301, + "step": 2821, + "time": 13.76 + }, + { + "epoch": 2.71, + "learning_rate": "1.5137e-04", + "loss": 0.7633, + "slid_loss": 0.7305, + "step": 2822, + "time": 13.58 + }, + { + "epoch": 2.71, + "learning_rate": "1.5134e-04", + "loss": 0.7941, + "slid_loss": 0.731, + "step": 2823, + "time": 12.29 + }, + { + "epoch": 2.71, + "learning_rate": "1.5131e-04", + "loss": 0.7046, + "slid_loss": 0.7306, + "step": 2824, + "time": 14.25 + }, + { + "epoch": 2.71, + "learning_rate": "1.5127e-04", + "loss": 0.6832, + "slid_loss": 0.7306, + "step": 2825, + "time": 12.21 + }, + { + "epoch": 2.71, + "learning_rate": "1.5124e-04", + "loss": 0.7824, + "slid_loss": 0.7302, + "step": 2826, + "time": 12.01 + }, + { + "epoch": 2.72, + "learning_rate": "1.5121e-04", + "loss": 0.6516, + "slid_loss": 0.7294, + "step": 2827, + "time": 11.27 + }, + { + "epoch": 2.72, + "learning_rate": "1.5117e-04", + "loss": 0.8109, + "slid_loss": 0.7311, + "step": 2828, + "time": 13.88 + }, + { + "epoch": 2.72, + "learning_rate": "1.5114e-04", + "loss": 0.6289, + "slid_loss": 0.7293, + "step": 2829, + "time": 13.57 + }, + { + "epoch": 2.72, + "learning_rate": "1.5111e-04", + "loss": 0.6821, + "slid_loss": 0.7292, + "step": 2830, + "time": 12.54 + }, + { + "epoch": 2.72, + "learning_rate": "1.5107e-04", + "loss": 0.7099, + "slid_loss": 0.7298, + "step": 2831, + "time": 14.01 + }, + { + "epoch": 2.72, + "learning_rate": "1.5104e-04", + "loss": 0.7555, + "slid_loss": 0.7304, + "step": 2832, + "time": 12.95 + }, + { + "epoch": 2.72, + "learning_rate": "1.5101e-04", + "loss": 0.7428, + "slid_loss": 0.7301, + "step": 2833, + "time": 14.97 + }, + { + "epoch": 2.72, + "learning_rate": "1.5097e-04", + "loss": 0.6829, + "slid_loss": 0.7299, + "step": 2834, + "time": 13.71 + }, + { + "epoch": 2.72, + "learning_rate": "1.5094e-04", + "loss": 0.7564, + "slid_loss": 0.7305, + "step": 2835, + "time": 12.17 + }, + { + "epoch": 2.72, + "learning_rate": "1.5091e-04", + "loss": 0.7232, + "slid_loss": 0.7291, + "step": 2836, + "time": 14.43 + }, + { + "epoch": 2.73, + "learning_rate": "1.5087e-04", + "loss": 0.6621, + "slid_loss": 0.7277, + "step": 2837, + "time": 13.63 + }, + { + "epoch": 2.73, + "learning_rate": "1.5084e-04", + "loss": 0.7093, + "slid_loss": 0.7269, + "step": 2838, + "time": 14.17 + }, + { + "epoch": 2.73, + "learning_rate": "1.5080e-04", + "loss": 0.7745, + "slid_loss": 0.7277, + "step": 2839, + "time": 13.44 + }, + { + "epoch": 2.73, + "learning_rate": "1.5077e-04", + "loss": 0.6835, + "slid_loss": 0.728, + "step": 2840, + "time": 13.7 + }, + { + "epoch": 2.73, + "learning_rate": "1.5074e-04", + "loss": 0.69, + "slid_loss": 0.7268, + "step": 2841, + "time": 12.87 + }, + { + "epoch": 2.73, + "learning_rate": "1.5070e-04", + "loss": 0.7674, + "slid_loss": 0.7274, + "step": 2842, + "time": 11.53 + }, + { + "epoch": 2.73, + "learning_rate": "1.5067e-04", + "loss": 0.7321, + "slid_loss": 0.7273, + "step": 2843, + "time": 13.17 + }, + { + "epoch": 2.73, + "learning_rate": "1.5064e-04", + "loss": 0.7813, + "slid_loss": 0.7269, + "step": 2844, + "time": 13.53 + }, + { + "epoch": 2.73, + "learning_rate": "1.5060e-04", + "loss": 0.7012, + "slid_loss": 0.7266, + "step": 2845, + "time": 13.71 + }, + { + "epoch": 2.73, + "learning_rate": "1.5057e-04", + "loss": 0.6337, + "slid_loss": 0.7255, + "step": 2846, + "time": 12.08 + }, + { + "epoch": 2.73, + "learning_rate": "1.5054e-04", + "loss": 0.743, + "slid_loss": 0.7259, + "step": 2847, + "time": 11.94 + }, + { + "epoch": 2.74, + "learning_rate": "1.5050e-04", + "loss": 0.7596, + "slid_loss": 0.7267, + "step": 2848, + "time": 14.0 + }, + { + "epoch": 2.74, + "learning_rate": "1.5047e-04", + "loss": 0.7029, + "slid_loss": 0.7251, + "step": 2849, + "time": 13.99 + }, + { + "epoch": 2.74, + "learning_rate": "1.5044e-04", + "loss": 0.6916, + "slid_loss": 0.7241, + "step": 2850, + "time": 15.0 + }, + { + "epoch": 2.74, + "learning_rate": "1.5040e-04", + "loss": 0.7159, + "slid_loss": 0.7237, + "step": 2851, + "time": 12.12 + }, + { + "epoch": 2.74, + "learning_rate": "1.5037e-04", + "loss": 0.796, + "slid_loss": 0.7241, + "step": 2852, + "time": 13.16 + }, + { + "epoch": 2.74, + "learning_rate": "1.5034e-04", + "loss": 0.8071, + "slid_loss": 0.7251, + "step": 2853, + "time": 13.94 + }, + { + "epoch": 2.74, + "learning_rate": "1.5030e-04", + "loss": 0.586, + "slid_loss": 0.7228, + "step": 2854, + "time": 14.34 + }, + { + "epoch": 2.74, + "learning_rate": "1.5027e-04", + "loss": 0.8893, + "slid_loss": 0.7251, + "step": 2855, + "time": 12.82 + }, + { + "epoch": 2.74, + "learning_rate": "1.5023e-04", + "loss": 0.694, + "slid_loss": 0.7248, + "step": 2856, + "time": 13.42 + }, + { + "epoch": 2.74, + "learning_rate": "1.5020e-04", + "loss": 0.7535, + "slid_loss": 0.7245, + "step": 2857, + "time": 13.67 + }, + { + "epoch": 2.75, + "learning_rate": "1.5017e-04", + "loss": 0.6745, + "slid_loss": 0.7241, + "step": 2858, + "time": 13.75 + }, + { + "epoch": 2.75, + "learning_rate": "1.5013e-04", + "loss": 0.7289, + "slid_loss": 0.7243, + "step": 2859, + "time": 13.41 + }, + { + "epoch": 2.75, + "learning_rate": "1.5010e-04", + "loss": 0.7481, + "slid_loss": 0.7254, + "step": 2860, + "time": 11.45 + }, + { + "epoch": 2.75, + "learning_rate": "1.5007e-04", + "loss": 0.7399, + "slid_loss": 0.7258, + "step": 2861, + "time": 12.86 + }, + { + "epoch": 2.75, + "learning_rate": "1.5003e-04", + "loss": 0.7397, + "slid_loss": 0.7258, + "step": 2862, + "time": 13.26 + }, + { + "epoch": 2.75, + "learning_rate": "1.5000e-04", + "loss": 0.7001, + "slid_loss": 0.7252, + "step": 2863, + "time": 13.23 + }, + { + "epoch": 2.75, + "learning_rate": "1.4997e-04", + "loss": 0.7309, + "slid_loss": 0.724, + "step": 2864, + "time": 14.05 + }, + { + "epoch": 2.75, + "learning_rate": "1.4993e-04", + "loss": 0.687, + "slid_loss": 0.7232, + "step": 2865, + "time": 13.37 + }, + { + "epoch": 2.75, + "learning_rate": "1.4990e-04", + "loss": 0.7897, + "slid_loss": 0.7237, + "step": 2866, + "time": 13.66 + }, + { + "epoch": 2.75, + "learning_rate": "1.4987e-04", + "loss": 0.7596, + "slid_loss": 0.7243, + "step": 2867, + "time": 13.34 + }, + { + "epoch": 2.76, + "learning_rate": "1.4983e-04", + "loss": 0.6284, + "slid_loss": 0.7244, + "step": 2868, + "time": 13.55 + }, + { + "epoch": 2.76, + "learning_rate": "1.4980e-04", + "loss": 0.6644, + "slid_loss": 0.7234, + "step": 2869, + "time": 14.02 + }, + { + "epoch": 2.76, + "learning_rate": "1.4977e-04", + "loss": 0.7921, + "slid_loss": 0.724, + "step": 2870, + "time": 13.56 + }, + { + "epoch": 2.76, + "learning_rate": "1.4973e-04", + "loss": 0.7336, + "slid_loss": 0.7243, + "step": 2871, + "time": 11.18 + }, + { + "epoch": 2.76, + "learning_rate": "1.4970e-04", + "loss": 0.6055, + "slid_loss": 0.7231, + "step": 2872, + "time": 13.62 + }, + { + "epoch": 2.76, + "learning_rate": "1.4966e-04", + "loss": 0.6651, + "slid_loss": 0.7224, + "step": 2873, + "time": 12.01 + }, + { + "epoch": 2.76, + "learning_rate": "1.4963e-04", + "loss": 0.7794, + "slid_loss": 0.7234, + "step": 2874, + "time": 13.64 + }, + { + "epoch": 2.76, + "learning_rate": "1.4960e-04", + "loss": 0.6732, + "slid_loss": 0.7236, + "step": 2875, + "time": 13.85 + }, + { + "epoch": 2.76, + "learning_rate": "1.4956e-04", + "loss": 0.735, + "slid_loss": 0.7236, + "step": 2876, + "time": 12.95 + }, + { + "epoch": 2.76, + "learning_rate": "1.4953e-04", + "loss": 0.7031, + "slid_loss": 0.723, + "step": 2877, + "time": 13.45 + }, + { + "epoch": 2.76, + "learning_rate": "1.4950e-04", + "loss": 0.7276, + "slid_loss": 0.724, + "step": 2878, + "time": 13.15 + }, + { + "epoch": 2.77, + "learning_rate": "1.4946e-04", + "loss": 0.7249, + "slid_loss": 0.7237, + "step": 2879, + "time": 13.37 + }, + { + "epoch": 2.77, + "learning_rate": "1.4943e-04", + "loss": 0.6236, + "slid_loss": 0.7226, + "step": 2880, + "time": 12.91 + }, + { + "epoch": 2.77, + "learning_rate": "1.4940e-04", + "loss": 0.7623, + "slid_loss": 0.7228, + "step": 2881, + "time": 11.95 + }, + { + "epoch": 2.77, + "learning_rate": "1.4936e-04", + "loss": 0.6503, + "slid_loss": 0.7221, + "step": 2882, + "time": 12.74 + }, + { + "epoch": 2.77, + "learning_rate": "1.4933e-04", + "loss": 0.7243, + "slid_loss": 0.7221, + "step": 2883, + "time": 12.12 + }, + { + "epoch": 2.77, + "learning_rate": "1.4930e-04", + "loss": 0.6631, + "slid_loss": 0.7214, + "step": 2884, + "time": 13.81 + }, + { + "epoch": 2.77, + "learning_rate": "1.4926e-04", + "loss": 0.6918, + "slid_loss": 0.722, + "step": 2885, + "time": 13.71 + }, + { + "epoch": 2.77, + "learning_rate": "1.4923e-04", + "loss": 0.629, + "slid_loss": 0.7205, + "step": 2886, + "time": 12.77 + }, + { + "epoch": 2.77, + "learning_rate": "1.4920e-04", + "loss": 0.7635, + "slid_loss": 0.7206, + "step": 2887, + "time": 13.13 + }, + { + "epoch": 2.77, + "learning_rate": "1.4916e-04", + "loss": 0.6988, + "slid_loss": 0.7205, + "step": 2888, + "time": 12.67 + }, + { + "epoch": 2.78, + "learning_rate": "1.4913e-04", + "loss": 0.735, + "slid_loss": 0.7206, + "step": 2889, + "time": 12.66 + }, + { + "epoch": 2.78, + "learning_rate": "1.4909e-04", + "loss": 0.7766, + "slid_loss": 0.7208, + "step": 2890, + "time": 13.48 + }, + { + "epoch": 2.78, + "learning_rate": "1.4906e-04", + "loss": 0.6878, + "slid_loss": 0.7203, + "step": 2891, + "time": 14.06 + }, + { + "epoch": 2.78, + "learning_rate": "1.4903e-04", + "loss": 0.6556, + "slid_loss": 0.7203, + "step": 2892, + "time": 13.37 + }, + { + "epoch": 2.78, + "learning_rate": "1.4899e-04", + "loss": 0.6911, + "slid_loss": 0.7199, + "step": 2893, + "time": 12.88 + }, + { + "epoch": 2.78, + "learning_rate": "1.4896e-04", + "loss": 0.7247, + "slid_loss": 0.7199, + "step": 2894, + "time": 13.39 + }, + { + "epoch": 2.78, + "learning_rate": "1.4893e-04", + "loss": 0.6661, + "slid_loss": 0.7195, + "step": 2895, + "time": 10.82 + }, + { + "epoch": 2.78, + "learning_rate": "1.4889e-04", + "loss": 0.7388, + "slid_loss": 0.7189, + "step": 2896, + "time": 12.81 + }, + { + "epoch": 2.78, + "learning_rate": "1.4886e-04", + "loss": 0.7086, + "slid_loss": 0.7178, + "step": 2897, + "time": 12.5 + }, + { + "epoch": 2.78, + "learning_rate": "1.4883e-04", + "loss": 0.6925, + "slid_loss": 0.7185, + "step": 2898, + "time": 13.79 + }, + { + "epoch": 2.78, + "learning_rate": "1.4879e-04", + "loss": 0.6242, + "slid_loss": 0.7168, + "step": 2899, + "time": 13.29 + }, + { + "epoch": 2.79, + "learning_rate": "1.4876e-04", + "loss": 0.7268, + "slid_loss": 0.7173, + "step": 2900, + "time": 11.32 + }, + { + "epoch": 2.79, + "learning_rate": "1.4873e-04", + "loss": 0.6174, + "slid_loss": 0.7165, + "step": 2901, + "time": 13.53 + }, + { + "epoch": 2.79, + "learning_rate": "1.4869e-04", + "loss": 0.7558, + "slid_loss": 0.7173, + "step": 2902, + "time": 11.51 + }, + { + "epoch": 2.79, + "learning_rate": "1.4866e-04", + "loss": 0.7052, + "slid_loss": 0.7177, + "step": 2903, + "time": 12.78 + }, + { + "epoch": 2.79, + "learning_rate": "1.4863e-04", + "loss": 0.7868, + "slid_loss": 0.7179, + "step": 2904, + "time": 12.42 + }, + { + "epoch": 2.79, + "learning_rate": "1.4859e-04", + "loss": 0.6797, + "slid_loss": 0.7175, + "step": 2905, + "time": 13.65 + }, + { + "epoch": 2.79, + "learning_rate": "1.4856e-04", + "loss": 0.7142, + "slid_loss": 0.7175, + "step": 2906, + "time": 13.45 + }, + { + "epoch": 2.79, + "learning_rate": "1.4852e-04", + "loss": 0.6828, + "slid_loss": 0.7166, + "step": 2907, + "time": 11.47 + }, + { + "epoch": 2.79, + "learning_rate": "1.4849e-04", + "loss": 0.6812, + "slid_loss": 0.7171, + "step": 2908, + "time": 12.98 + }, + { + "epoch": 2.79, + "learning_rate": "1.4846e-04", + "loss": 0.7057, + "slid_loss": 0.7167, + "step": 2909, + "time": 12.96 + }, + { + "epoch": 2.8, + "learning_rate": "1.4842e-04", + "loss": 0.663, + "slid_loss": 0.7156, + "step": 2910, + "time": 12.82 + }, + { + "epoch": 2.8, + "learning_rate": "1.4839e-04", + "loss": 0.6505, + "slid_loss": 0.7147, + "step": 2911, + "time": 13.29 + }, + { + "epoch": 2.8, + "learning_rate": "1.4836e-04", + "loss": 0.7331, + "slid_loss": 0.715, + "step": 2912, + "time": 12.89 + }, + { + "epoch": 2.8, + "learning_rate": "1.4832e-04", + "loss": 0.6837, + "slid_loss": 0.7146, + "step": 2913, + "time": 13.24 + }, + { + "epoch": 2.8, + "learning_rate": "1.4829e-04", + "loss": 0.6864, + "slid_loss": 0.7141, + "step": 2914, + "time": 13.55 + }, + { + "epoch": 2.8, + "learning_rate": "1.4826e-04", + "loss": 0.7246, + "slid_loss": 0.714, + "step": 2915, + "time": 13.22 + }, + { + "epoch": 2.8, + "learning_rate": "1.4822e-04", + "loss": 0.621, + "slid_loss": 0.7135, + "step": 2916, + "time": 13.33 + }, + { + "epoch": 2.8, + "learning_rate": "1.4819e-04", + "loss": 0.6926, + "slid_loss": 0.7132, + "step": 2917, + "time": 13.64 + }, + { + "epoch": 2.8, + "learning_rate": "1.4816e-04", + "loss": 0.695, + "slid_loss": 0.7126, + "step": 2918, + "time": 14.37 + }, + { + "epoch": 2.8, + "learning_rate": "1.4812e-04", + "loss": 0.6725, + "slid_loss": 0.711, + "step": 2919, + "time": 13.52 + }, + { + "epoch": 2.8, + "learning_rate": "1.4809e-04", + "loss": 0.8606, + "slid_loss": 0.7118, + "step": 2920, + "time": 13.16 + }, + { + "epoch": 2.81, + "learning_rate": "1.4806e-04", + "loss": 0.6707, + "slid_loss": 0.7119, + "step": 2921, + "time": 13.06 + }, + { + "epoch": 2.81, + "learning_rate": "1.4802e-04", + "loss": 0.7499, + "slid_loss": 0.7118, + "step": 2922, + "time": 11.85 + }, + { + "epoch": 2.81, + "learning_rate": "1.4799e-04", + "loss": 0.7896, + "slid_loss": 0.7118, + "step": 2923, + "time": 12.85 + }, + { + "epoch": 2.81, + "learning_rate": "1.4795e-04", + "loss": 0.6163, + "slid_loss": 0.7109, + "step": 2924, + "time": 12.59 + }, + { + "epoch": 2.81, + "learning_rate": "1.4792e-04", + "loss": 0.6991, + "slid_loss": 0.711, + "step": 2925, + "time": 13.79 + }, + { + "epoch": 2.81, + "learning_rate": "1.4789e-04", + "loss": 0.7284, + "slid_loss": 0.7105, + "step": 2926, + "time": 11.4 + }, + { + "epoch": 2.81, + "learning_rate": "1.4785e-04", + "loss": 0.6735, + "slid_loss": 0.7107, + "step": 2927, + "time": 13.12 + }, + { + "epoch": 2.81, + "learning_rate": "1.4782e-04", + "loss": 0.7231, + "slid_loss": 0.7098, + "step": 2928, + "time": 14.26 + }, + { + "epoch": 2.81, + "learning_rate": "1.4779e-04", + "loss": 0.7292, + "slid_loss": 0.7108, + "step": 2929, + "time": 13.19 + }, + { + "epoch": 2.81, + "learning_rate": "1.4775e-04", + "loss": 0.7119, + "slid_loss": 0.7111, + "step": 2930, + "time": 13.31 + }, + { + "epoch": 2.82, + "learning_rate": "1.4772e-04", + "loss": 0.684, + "slid_loss": 0.7109, + "step": 2931, + "time": 13.0 + }, + { + "epoch": 2.82, + "learning_rate": "1.4769e-04", + "loss": 0.7878, + "slid_loss": 0.7112, + "step": 2932, + "time": 13.84 + }, + { + "epoch": 2.82, + "learning_rate": "1.4765e-04", + "loss": 0.74, + "slid_loss": 0.7112, + "step": 2933, + "time": 13.31 + }, + { + "epoch": 2.82, + "learning_rate": "1.4762e-04", + "loss": 0.5858, + "slid_loss": 0.7102, + "step": 2934, + "time": 12.85 + }, + { + "epoch": 2.82, + "learning_rate": "1.4759e-04", + "loss": 0.7487, + "slid_loss": 0.7101, + "step": 2935, + "time": 13.49 + }, + { + "epoch": 2.82, + "learning_rate": "1.4755e-04", + "loss": 0.7344, + "slid_loss": 0.7102, + "step": 2936, + "time": 14.17 + }, + { + "epoch": 2.82, + "learning_rate": "1.4752e-04", + "loss": 0.6938, + "slid_loss": 0.7105, + "step": 2937, + "time": 10.98 + }, + { + "epoch": 2.82, + "learning_rate": "1.4749e-04", + "loss": 0.6819, + "slid_loss": 0.7103, + "step": 2938, + "time": 13.84 + }, + { + "epoch": 2.82, + "learning_rate": "1.4745e-04", + "loss": 0.6859, + "slid_loss": 0.7094, + "step": 2939, + "time": 11.31 + }, + { + "epoch": 2.82, + "learning_rate": "1.4742e-04", + "loss": 0.5853, + "slid_loss": 0.7084, + "step": 2940, + "time": 11.92 + }, + { + "epoch": 2.83, + "learning_rate": "1.4739e-04", + "loss": 0.7303, + "slid_loss": 0.7088, + "step": 2941, + "time": 12.93 + }, + { + "epoch": 2.83, + "learning_rate": "1.4735e-04", + "loss": 0.7024, + "slid_loss": 0.7082, + "step": 2942, + "time": 11.59 + }, + { + "epoch": 2.83, + "learning_rate": "1.4732e-04", + "loss": 0.7075, + "slid_loss": 0.7079, + "step": 2943, + "time": 13.66 + }, + { + "epoch": 2.83, + "learning_rate": "1.4728e-04", + "loss": 0.6617, + "slid_loss": 0.7067, + "step": 2944, + "time": 13.87 + }, + { + "epoch": 2.83, + "learning_rate": "1.4725e-04", + "loss": 0.7589, + "slid_loss": 0.7073, + "step": 2945, + "time": 13.22 + }, + { + "epoch": 2.83, + "learning_rate": "1.4722e-04", + "loss": 0.638, + "slid_loss": 0.7073, + "step": 2946, + "time": 11.57 + }, + { + "epoch": 2.83, + "learning_rate": "1.4718e-04", + "loss": 0.6649, + "slid_loss": 0.7066, + "step": 2947, + "time": 13.47 + }, + { + "epoch": 2.83, + "learning_rate": "1.4715e-04", + "loss": 0.7498, + "slid_loss": 0.7065, + "step": 2948, + "time": 12.92 + }, + { + "epoch": 2.83, + "learning_rate": "1.4712e-04", + "loss": 0.6483, + "slid_loss": 0.7059, + "step": 2949, + "time": 12.25 + }, + { + "epoch": 2.83, + "learning_rate": "1.4708e-04", + "loss": 0.7475, + "slid_loss": 0.7065, + "step": 2950, + "time": 11.68 + }, + { + "epoch": 2.83, + "learning_rate": "1.4705e-04", + "loss": 0.7781, + "slid_loss": 0.7071, + "step": 2951, + "time": 13.96 + }, + { + "epoch": 2.84, + "learning_rate": "1.4702e-04", + "loss": 0.6448, + "slid_loss": 0.7056, + "step": 2952, + "time": 11.04 + }, + { + "epoch": 2.84, + "learning_rate": "1.4698e-04", + "loss": 0.6974, + "slid_loss": 0.7045, + "step": 2953, + "time": 11.26 + }, + { + "epoch": 2.84, + "learning_rate": "1.4695e-04", + "loss": 0.6827, + "slid_loss": 0.7055, + "step": 2954, + "time": 12.74 + }, + { + "epoch": 2.84, + "learning_rate": "1.4692e-04", + "loss": 0.7681, + "slid_loss": 0.7042, + "step": 2955, + "time": 14.03 + }, + { + "epoch": 2.84, + "learning_rate": "1.4688e-04", + "loss": 0.7484, + "slid_loss": 0.7048, + "step": 2956, + "time": 11.31 + }, + { + "epoch": 2.84, + "learning_rate": "1.4685e-04", + "loss": 0.7456, + "slid_loss": 0.7047, + "step": 2957, + "time": 14.19 + }, + { + "epoch": 2.84, + "learning_rate": "1.4682e-04", + "loss": 0.7363, + "slid_loss": 0.7053, + "step": 2958, + "time": 13.3 + }, + { + "epoch": 2.84, + "learning_rate": "1.4678e-04", + "loss": 0.7108, + "slid_loss": 0.7051, + "step": 2959, + "time": 11.22 + }, + { + "epoch": 2.84, + "learning_rate": "1.4675e-04", + "loss": 0.7673, + "slid_loss": 0.7053, + "step": 2960, + "time": 13.81 + }, + { + "epoch": 2.84, + "learning_rate": "1.4672e-04", + "loss": 0.7133, + "slid_loss": 0.7051, + "step": 2961, + "time": 13.38 + }, + { + "epoch": 2.85, + "learning_rate": "1.4668e-04", + "loss": 0.7739, + "slid_loss": 0.7054, + "step": 2962, + "time": 13.97 + }, + { + "epoch": 2.85, + "learning_rate": "1.4665e-04", + "loss": 0.7004, + "slid_loss": 0.7054, + "step": 2963, + "time": 12.96 + }, + { + "epoch": 2.85, + "learning_rate": "1.4662e-04", + "loss": 0.756, + "slid_loss": 0.7057, + "step": 2964, + "time": 13.84 + }, + { + "epoch": 2.85, + "learning_rate": "1.4658e-04", + "loss": 0.7557, + "slid_loss": 0.7064, + "step": 2965, + "time": 11.82 + }, + { + "epoch": 2.85, + "learning_rate": "1.4655e-04", + "loss": 0.6347, + "slid_loss": 0.7048, + "step": 2966, + "time": 13.94 + }, + { + "epoch": 2.85, + "learning_rate": "1.4652e-04", + "loss": 0.7663, + "slid_loss": 0.7049, + "step": 2967, + "time": 13.49 + }, + { + "epoch": 2.85, + "learning_rate": "1.4648e-04", + "loss": 0.7919, + "slid_loss": 0.7065, + "step": 2968, + "time": 13.15 + }, + { + "epoch": 2.85, + "learning_rate": "1.4645e-04", + "loss": 0.7785, + "slid_loss": 0.7076, + "step": 2969, + "time": 13.57 + }, + { + "epoch": 2.85, + "learning_rate": "1.4641e-04", + "loss": 0.7689, + "slid_loss": 0.7074, + "step": 2970, + "time": 12.85 + }, + { + "epoch": 2.85, + "learning_rate": "1.4638e-04", + "loss": 0.6634, + "slid_loss": 0.7067, + "step": 2971, + "time": 13.23 + }, + { + "epoch": 2.85, + "learning_rate": "1.4635e-04", + "loss": 0.7007, + "slid_loss": 0.7077, + "step": 2972, + "time": 13.21 + }, + { + "epoch": 2.86, + "learning_rate": "1.4631e-04", + "loss": 0.773, + "slid_loss": 0.7087, + "step": 2973, + "time": 12.84 + }, + { + "epoch": 2.86, + "learning_rate": "1.4628e-04", + "loss": 0.6484, + "slid_loss": 0.7074, + "step": 2974, + "time": 13.29 + }, + { + "epoch": 2.86, + "learning_rate": "1.4625e-04", + "loss": 0.7372, + "slid_loss": 0.7081, + "step": 2975, + "time": 13.13 + }, + { + "epoch": 2.86, + "learning_rate": "1.4621e-04", + "loss": 0.6386, + "slid_loss": 0.7071, + "step": 2976, + "time": 12.9 + }, + { + "epoch": 2.86, + "learning_rate": "1.4618e-04", + "loss": 0.7698, + "slid_loss": 0.7078, + "step": 2977, + "time": 13.7 + }, + { + "epoch": 2.86, + "learning_rate": "1.4615e-04", + "loss": 0.8024, + "slid_loss": 0.7085, + "step": 2978, + "time": 14.91 + }, + { + "epoch": 2.86, + "learning_rate": "1.4611e-04", + "loss": 0.6839, + "slid_loss": 0.7081, + "step": 2979, + "time": 13.16 + }, + { + "epoch": 2.86, + "learning_rate": "1.4608e-04", + "loss": 0.7217, + "slid_loss": 0.7091, + "step": 2980, + "time": 13.38 + }, + { + "epoch": 2.86, + "learning_rate": "1.4605e-04", + "loss": 0.5628, + "slid_loss": 0.7071, + "step": 2981, + "time": 13.81 + }, + { + "epoch": 2.86, + "learning_rate": "1.4601e-04", + "loss": 0.769, + "slid_loss": 0.7083, + "step": 2982, + "time": 11.79 + }, + { + "epoch": 2.87, + "learning_rate": "1.4598e-04", + "loss": 0.6551, + "slid_loss": 0.7076, + "step": 2983, + "time": 11.39 + }, + { + "epoch": 2.87, + "learning_rate": "1.4595e-04", + "loss": 0.6776, + "slid_loss": 0.7077, + "step": 2984, + "time": 13.07 + }, + { + "epoch": 2.87, + "learning_rate": "1.4591e-04", + "loss": 0.6473, + "slid_loss": 0.7073, + "step": 2985, + "time": 13.14 + }, + { + "epoch": 2.87, + "learning_rate": "1.4588e-04", + "loss": 0.7464, + "slid_loss": 0.7085, + "step": 2986, + "time": 10.9 + }, + { + "epoch": 2.87, + "learning_rate": "1.4585e-04", + "loss": 0.6552, + "slid_loss": 0.7074, + "step": 2987, + "time": 13.55 + }, + { + "epoch": 2.87, + "learning_rate": "1.4581e-04", + "loss": 0.6471, + "slid_loss": 0.7069, + "step": 2988, + "time": 14.06 + }, + { + "epoch": 2.87, + "learning_rate": "1.4578e-04", + "loss": 0.6549, + "slid_loss": 0.7061, + "step": 2989, + "time": 11.25 + }, + { + "epoch": 2.87, + "learning_rate": "1.4575e-04", + "loss": 0.682, + "slid_loss": 0.7051, + "step": 2990, + "time": 13.82 + }, + { + "epoch": 2.87, + "learning_rate": "1.4571e-04", + "loss": 0.6921, + "slid_loss": 0.7052, + "step": 2991, + "time": 12.93 + }, + { + "epoch": 2.87, + "learning_rate": "1.4568e-04", + "loss": 0.6534, + "slid_loss": 0.7051, + "step": 2992, + "time": 12.91 + }, + { + "epoch": 2.88, + "learning_rate": "1.4565e-04", + "loss": 0.7625, + "slid_loss": 0.7059, + "step": 2993, + "time": 13.13 + }, + { + "epoch": 2.88, + "learning_rate": "1.4561e-04", + "loss": 0.6895, + "slid_loss": 0.7055, + "step": 2994, + "time": 12.99 + }, + { + "epoch": 2.88, + "learning_rate": "1.4558e-04", + "loss": 0.6825, + "slid_loss": 0.7057, + "step": 2995, + "time": 13.93 + }, + { + "epoch": 2.88, + "learning_rate": "1.4555e-04", + "loss": 0.7275, + "slid_loss": 0.7056, + "step": 2996, + "time": 13.66 + }, + { + "epoch": 2.88, + "learning_rate": "1.4551e-04", + "loss": 0.5816, + "slid_loss": 0.7043, + "step": 2997, + "time": 13.23 + }, + { + "epoch": 2.88, + "learning_rate": "1.4548e-04", + "loss": 0.7886, + "slid_loss": 0.7052, + "step": 2998, + "time": 13.51 + }, + { + "epoch": 2.88, + "learning_rate": "1.4545e-04", + "loss": 0.7202, + "slid_loss": 0.7062, + "step": 2999, + "time": 12.89 + }, + { + "epoch": 2.88, + "learning_rate": "1.4541e-04", + "loss": 0.6905, + "slid_loss": 0.7058, + "step": 3000, + "time": 12.32 + }, + { + "epoch": 2.88, + "learning_rate": "1.4538e-04", + "loss": 0.6916, + "slid_loss": 0.7066, + "step": 3001, + "time": 11.26 + }, + { + "epoch": 2.88, + "learning_rate": "1.4535e-04", + "loss": 0.6123, + "slid_loss": 0.7051, + "step": 3002, + "time": 11.38 + }, + { + "epoch": 2.88, + "learning_rate": "1.4531e-04", + "loss": 0.757, + "slid_loss": 0.7057, + "step": 3003, + "time": 14.12 + }, + { + "epoch": 2.89, + "learning_rate": "1.4528e-04", + "loss": 0.7257, + "slid_loss": 0.7051, + "step": 3004, + "time": 13.45 + }, + { + "epoch": 2.89, + "learning_rate": "1.4525e-04", + "loss": 0.6034, + "slid_loss": 0.7043, + "step": 3005, + "time": 12.2 + }, + { + "epoch": 2.89, + "learning_rate": "1.4521e-04", + "loss": 0.623, + "slid_loss": 0.7034, + "step": 3006, + "time": 13.38 + }, + { + "epoch": 2.89, + "learning_rate": "1.4518e-04", + "loss": 0.6806, + "slid_loss": 0.7034, + "step": 3007, + "time": 13.78 + }, + { + "epoch": 2.89, + "learning_rate": "1.4515e-04", + "loss": 0.6505, + "slid_loss": 0.7031, + "step": 3008, + "time": 13.22 + }, + { + "epoch": 2.89, + "learning_rate": "1.4511e-04", + "loss": 0.6953, + "slid_loss": 0.7029, + "step": 3009, + "time": 12.95 + }, + { + "epoch": 2.89, + "learning_rate": "1.4508e-04", + "loss": 0.7343, + "slid_loss": 0.7037, + "step": 3010, + "time": 13.48 + }, + { + "epoch": 2.89, + "learning_rate": "1.4504e-04", + "loss": 0.6928, + "slid_loss": 0.7041, + "step": 3011, + "time": 12.96 + }, + { + "epoch": 2.89, + "learning_rate": "1.4501e-04", + "loss": 0.6868, + "slid_loss": 0.7036, + "step": 3012, + "time": 13.7 + }, + { + "epoch": 2.89, + "learning_rate": "1.4498e-04", + "loss": 0.6916, + "slid_loss": 0.7037, + "step": 3013, + "time": 12.93 + }, + { + "epoch": 2.9, + "learning_rate": "1.4494e-04", + "loss": 0.6456, + "slid_loss": 0.7033, + "step": 3014, + "time": 11.38 + }, + { + "epoch": 2.9, + "learning_rate": "1.4491e-04", + "loss": 0.7417, + "slid_loss": 0.7035, + "step": 3015, + "time": 13.34 + }, + { + "epoch": 2.9, + "learning_rate": "1.4488e-04", + "loss": 0.7384, + "slid_loss": 0.7046, + "step": 3016, + "time": 11.85 + }, + { + "epoch": 2.9, + "learning_rate": "1.4484e-04", + "loss": 0.6083, + "slid_loss": 0.7038, + "step": 3017, + "time": 13.66 + }, + { + "epoch": 2.9, + "learning_rate": "1.4481e-04", + "loss": 0.6081, + "slid_loss": 0.7029, + "step": 3018, + "time": 13.5 + }, + { + "epoch": 2.9, + "learning_rate": "1.4478e-04", + "loss": 0.6343, + "slid_loss": 0.7025, + "step": 3019, + "time": 12.91 + }, + { + "epoch": 2.9, + "learning_rate": "1.4474e-04", + "loss": 0.7031, + "slid_loss": 0.701, + "step": 3020, + "time": 13.83 + }, + { + "epoch": 2.9, + "learning_rate": "1.4471e-04", + "loss": 0.679, + "slid_loss": 0.701, + "step": 3021, + "time": 13.6 + }, + { + "epoch": 2.9, + "learning_rate": "1.4468e-04", + "loss": 0.6341, + "slid_loss": 0.6999, + "step": 3022, + "time": 11.29 + }, + { + "epoch": 2.9, + "learning_rate": "1.4464e-04", + "loss": 0.6778, + "slid_loss": 0.6988, + "step": 3023, + "time": 14.48 + }, + { + "epoch": 2.9, + "learning_rate": "1.4461e-04", + "loss": 0.7505, + "slid_loss": 0.7001, + "step": 3024, + "time": 13.54 + }, + { + "epoch": 2.91, + "learning_rate": "1.4458e-04", + "loss": 0.5979, + "slid_loss": 0.6991, + "step": 3025, + "time": 14.06 + }, + { + "epoch": 2.91, + "learning_rate": "1.4454e-04", + "loss": 0.6766, + "slid_loss": 0.6986, + "step": 3026, + "time": 13.25 + }, + { + "epoch": 2.91, + "learning_rate": "1.4451e-04", + "loss": 0.5747, + "slid_loss": 0.6976, + "step": 3027, + "time": 10.87 + }, + { + "epoch": 2.91, + "learning_rate": "1.4448e-04", + "loss": 0.7425, + "slid_loss": 0.6978, + "step": 3028, + "time": 14.0 + }, + { + "epoch": 2.91, + "learning_rate": "1.4444e-04", + "loss": 0.7362, + "slid_loss": 0.6979, + "step": 3029, + "time": 12.77 + }, + { + "epoch": 2.91, + "learning_rate": "1.4441e-04", + "loss": 0.7399, + "slid_loss": 0.6981, + "step": 3030, + "time": 13.34 + }, + { + "epoch": 2.91, + "learning_rate": "1.4438e-04", + "loss": 0.7381, + "slid_loss": 0.6987, + "step": 3031, + "time": 11.0 + }, + { + "epoch": 2.91, + "learning_rate": "1.4434e-04", + "loss": 0.6595, + "slid_loss": 0.6974, + "step": 3032, + "time": 11.44 + }, + { + "epoch": 2.91, + "learning_rate": "1.4431e-04", + "loss": 0.6314, + "slid_loss": 0.6963, + "step": 3033, + "time": 13.45 + }, + { + "epoch": 2.91, + "learning_rate": "1.4428e-04", + "loss": 0.6767, + "slid_loss": 0.6972, + "step": 3034, + "time": 11.82 + }, + { + "epoch": 2.92, + "learning_rate": "1.4424e-04", + "loss": 0.7467, + "slid_loss": 0.6972, + "step": 3035, + "time": 11.56 + }, + { + "epoch": 2.92, + "learning_rate": "1.4421e-04", + "loss": 0.5999, + "slid_loss": 0.6959, + "step": 3036, + "time": 13.23 + }, + { + "epoch": 2.92, + "learning_rate": "1.4418e-04", + "loss": 0.6967, + "slid_loss": 0.6959, + "step": 3037, + "time": 14.18 + }, + { + "epoch": 2.92, + "learning_rate": "1.4414e-04", + "loss": 0.6797, + "slid_loss": 0.6959, + "step": 3038, + "time": 14.02 + }, + { + "epoch": 2.92, + "learning_rate": "1.4411e-04", + "loss": 0.6724, + "slid_loss": 0.6957, + "step": 3039, + "time": 14.16 + }, + { + "epoch": 2.92, + "learning_rate": "1.4408e-04", + "loss": 0.7039, + "slid_loss": 0.6969, + "step": 3040, + "time": 13.52 + }, + { + "epoch": 2.92, + "learning_rate": "1.4404e-04", + "loss": 0.6728, + "slid_loss": 0.6963, + "step": 3041, + "time": 13.23 + }, + { + "epoch": 2.92, + "learning_rate": "1.4401e-04", + "loss": 0.7397, + "slid_loss": 0.6967, + "step": 3042, + "time": 11.58 + }, + { + "epoch": 2.92, + "learning_rate": "1.4398e-04", + "loss": 0.716, + "slid_loss": 0.6968, + "step": 3043, + "time": 13.87 + }, + { + "epoch": 2.92, + "learning_rate": "1.4394e-04", + "loss": 0.6447, + "slid_loss": 0.6966, + "step": 3044, + "time": 11.98 + }, + { + "epoch": 2.93, + "learning_rate": "1.4391e-04", + "loss": 0.7546, + "slid_loss": 0.6966, + "step": 3045, + "time": 12.23 + }, + { + "epoch": 2.93, + "learning_rate": "1.4388e-04", + "loss": 0.6668, + "slid_loss": 0.6969, + "step": 3046, + "time": 13.85 + }, + { + "epoch": 2.93, + "learning_rate": "1.4385e-04", + "loss": 0.7693, + "slid_loss": 0.6979, + "step": 3047, + "time": 13.92 + }, + { + "epoch": 2.93, + "learning_rate": "1.4381e-04", + "loss": 0.7233, + "slid_loss": 0.6977, + "step": 3048, + "time": 13.87 + }, + { + "epoch": 2.93, + "learning_rate": "1.4378e-04", + "loss": 0.693, + "slid_loss": 0.6981, + "step": 3049, + "time": 12.82 + }, + { + "epoch": 2.93, + "learning_rate": "1.4375e-04", + "loss": 0.6997, + "slid_loss": 0.6976, + "step": 3050, + "time": 13.49 + }, + { + "epoch": 2.93, + "learning_rate": "1.4371e-04", + "loss": 0.7083, + "slid_loss": 0.6969, + "step": 3051, + "time": 13.55 + }, + { + "epoch": 2.93, + "learning_rate": "1.4368e-04", + "loss": 0.5744, + "slid_loss": 0.6962, + "step": 3052, + "time": 14.13 + }, + { + "epoch": 2.93, + "learning_rate": "1.4365e-04", + "loss": 0.6806, + "slid_loss": 0.6961, + "step": 3053, + "time": 11.09 + }, + { + "epoch": 2.93, + "learning_rate": "1.4361e-04", + "loss": 0.6486, + "slid_loss": 0.6957, + "step": 3054, + "time": 13.72 + }, + { + "epoch": 2.93, + "learning_rate": "1.4358e-04", + "loss": 0.7679, + "slid_loss": 0.6957, + "step": 3055, + "time": 13.11 + }, + { + "epoch": 2.94, + "learning_rate": "1.4355e-04", + "loss": 0.7074, + "slid_loss": 0.6953, + "step": 3056, + "time": 13.55 + }, + { + "epoch": 2.94, + "learning_rate": "1.4351e-04", + "loss": 0.7615, + "slid_loss": 0.6955, + "step": 3057, + "time": 13.86 + }, + { + "epoch": 2.94, + "learning_rate": "1.4348e-04", + "loss": 0.707, + "slid_loss": 0.6952, + "step": 3058, + "time": 12.85 + }, + { + "epoch": 2.94, + "learning_rate": "1.4345e-04", + "loss": 0.7509, + "slid_loss": 0.6956, + "step": 3059, + "time": 13.2 + }, + { + "epoch": 2.94, + "learning_rate": "1.4341e-04", + "loss": 0.6994, + "slid_loss": 0.6949, + "step": 3060, + "time": 13.43 + }, + { + "epoch": 2.94, + "learning_rate": "1.4338e-04", + "loss": 0.7304, + "slid_loss": 0.6951, + "step": 3061, + "time": 10.99 + }, + { + "epoch": 2.94, + "learning_rate": "1.4335e-04", + "loss": 0.5641, + "slid_loss": 0.693, + "step": 3062, + "time": 12.89 + }, + { + "epoch": 2.94, + "learning_rate": "1.4331e-04", + "loss": 0.689, + "slid_loss": 0.6928, + "step": 3063, + "time": 11.85 + }, + { + "epoch": 2.94, + "learning_rate": "1.4328e-04", + "loss": 0.7601, + "slid_loss": 0.6929, + "step": 3064, + "time": 11.1 + }, + { + "epoch": 2.94, + "learning_rate": "1.4325e-04", + "loss": 0.6955, + "slid_loss": 0.6923, + "step": 3065, + "time": 14.0 + }, + { + "epoch": 2.95, + "learning_rate": "1.4321e-04", + "loss": 0.6926, + "slid_loss": 0.6929, + "step": 3066, + "time": 11.91 + }, + { + "epoch": 2.95, + "learning_rate": "1.4318e-04", + "loss": 0.7107, + "slid_loss": 0.6923, + "step": 3067, + "time": 13.84 + }, + { + "epoch": 2.95, + "learning_rate": "1.4315e-04", + "loss": 0.6372, + "slid_loss": 0.6908, + "step": 3068, + "time": 12.22 + }, + { + "epoch": 2.95, + "learning_rate": "1.4311e-04", + "loss": 0.6726, + "slid_loss": 0.6897, + "step": 3069, + "time": 12.02 + }, + { + "epoch": 2.95, + "learning_rate": "1.4308e-04", + "loss": 0.7133, + "slid_loss": 0.6891, + "step": 3070, + "time": 13.05 + }, + { + "epoch": 2.95, + "learning_rate": "1.4305e-04", + "loss": 0.6641, + "slid_loss": 0.6892, + "step": 3071, + "time": 11.36 + }, + { + "epoch": 2.95, + "learning_rate": "1.4301e-04", + "loss": 0.6922, + "slid_loss": 0.6891, + "step": 3072, + "time": 12.07 + }, + { + "epoch": 2.95, + "learning_rate": "1.4298e-04", + "loss": 0.6419, + "slid_loss": 0.6878, + "step": 3073, + "time": 13.57 + }, + { + "epoch": 2.95, + "learning_rate": "1.4295e-04", + "loss": 0.6794, + "slid_loss": 0.6881, + "step": 3074, + "time": 12.87 + }, + { + "epoch": 2.95, + "learning_rate": "1.4291e-04", + "loss": 0.6922, + "slid_loss": 0.6876, + "step": 3075, + "time": 12.87 + }, + { + "epoch": 2.95, + "learning_rate": "1.4288e-04", + "loss": 0.7283, + "slid_loss": 0.6885, + "step": 3076, + "time": 13.35 + }, + { + "epoch": 2.96, + "learning_rate": "1.4285e-04", + "loss": 0.776, + "slid_loss": 0.6886, + "step": 3077, + "time": 13.71 + }, + { + "epoch": 2.96, + "learning_rate": "1.4281e-04", + "loss": 0.7573, + "slid_loss": 0.6881, + "step": 3078, + "time": 13.58 + }, + { + "epoch": 2.96, + "learning_rate": "1.4278e-04", + "loss": 0.7384, + "slid_loss": 0.6887, + "step": 3079, + "time": 13.56 + }, + { + "epoch": 2.96, + "learning_rate": "1.4275e-04", + "loss": 0.6639, + "slid_loss": 0.6881, + "step": 3080, + "time": 13.4 + }, + { + "epoch": 2.96, + "learning_rate": "1.4272e-04", + "loss": 0.7073, + "slid_loss": 0.6895, + "step": 3081, + "time": 13.89 + }, + { + "epoch": 2.96, + "learning_rate": "1.4268e-04", + "loss": 0.7512, + "slid_loss": 0.6894, + "step": 3082, + "time": 13.0 + }, + { + "epoch": 2.96, + "learning_rate": "1.4265e-04", + "loss": 0.7828, + "slid_loss": 0.6906, + "step": 3083, + "time": 13.89 + }, + { + "epoch": 2.96, + "learning_rate": "1.4262e-04", + "loss": 0.7727, + "slid_loss": 0.6916, + "step": 3084, + "time": 13.46 + }, + { + "epoch": 2.96, + "learning_rate": "1.4258e-04", + "loss": 0.5772, + "slid_loss": 0.6909, + "step": 3085, + "time": 13.2 + }, + { + "epoch": 2.96, + "learning_rate": "1.4255e-04", + "loss": 0.7195, + "slid_loss": 0.6906, + "step": 3086, + "time": 13.68 + }, + { + "epoch": 2.97, + "learning_rate": "1.4252e-04", + "loss": 0.6892, + "slid_loss": 0.691, + "step": 3087, + "time": 13.31 + }, + { + "epoch": 2.97, + "learning_rate": "1.4248e-04", + "loss": 0.6903, + "slid_loss": 0.6914, + "step": 3088, + "time": 12.95 + }, + { + "epoch": 2.97, + "learning_rate": "1.4245e-04", + "loss": 0.7271, + "slid_loss": 0.6921, + "step": 3089, + "time": 13.19 + }, + { + "epoch": 2.97, + "learning_rate": "1.4242e-04", + "loss": 0.6685, + "slid_loss": 0.692, + "step": 3090, + "time": 13.24 + }, + { + "epoch": 2.97, + "learning_rate": "1.4238e-04", + "loss": 0.7246, + "slid_loss": 0.6923, + "step": 3091, + "time": 13.44 + }, + { + "epoch": 2.97, + "learning_rate": "1.4235e-04", + "loss": 0.633, + "slid_loss": 0.6921, + "step": 3092, + "time": 11.55 + }, + { + "epoch": 2.97, + "learning_rate": "1.4232e-04", + "loss": 0.7313, + "slid_loss": 0.6918, + "step": 3093, + "time": 13.69 + }, + { + "epoch": 2.97, + "learning_rate": "1.4228e-04", + "loss": 0.7078, + "slid_loss": 0.692, + "step": 3094, + "time": 13.64 + }, + { + "epoch": 2.97, + "learning_rate": "1.4225e-04", + "loss": 0.7529, + "slid_loss": 0.6927, + "step": 3095, + "time": 13.82 + }, + { + "epoch": 2.97, + "learning_rate": "1.4222e-04", + "loss": 0.7222, + "slid_loss": 0.6926, + "step": 3096, + "time": 12.13 + }, + { + "epoch": 2.98, + "learning_rate": "1.4218e-04", + "loss": 0.6911, + "slid_loss": 0.6937, + "step": 3097, + "time": 13.81 + }, + { + "epoch": 2.98, + "learning_rate": "1.4215e-04", + "loss": 0.6755, + "slid_loss": 0.6926, + "step": 3098, + "time": 11.55 + }, + { + "epoch": 2.98, + "learning_rate": "1.4212e-04", + "loss": 0.6705, + "slid_loss": 0.6921, + "step": 3099, + "time": 11.41 + }, + { + "epoch": 2.98, + "learning_rate": "1.4209e-04", + "loss": 0.5421, + "slid_loss": 0.6906, + "step": 3100, + "time": 13.28 + }, + { + "epoch": 2.98, + "learning_rate": "1.4205e-04", + "loss": 0.7454, + "slid_loss": 0.6911, + "step": 3101, + "time": 13.42 + }, + { + "epoch": 2.98, + "learning_rate": "1.4202e-04", + "loss": 0.7021, + "slid_loss": 0.692, + "step": 3102, + "time": 12.71 + }, + { + "epoch": 2.98, + "learning_rate": "1.4199e-04", + "loss": 0.6789, + "slid_loss": 0.6913, + "step": 3103, + "time": 13.72 + }, + { + "epoch": 2.98, + "learning_rate": "1.4195e-04", + "loss": 0.6407, + "slid_loss": 0.6904, + "step": 3104, + "time": 13.47 + }, + { + "epoch": 2.98, + "learning_rate": "1.4192e-04", + "loss": 0.6743, + "slid_loss": 0.6911, + "step": 3105, + "time": 14.58 + }, + { + "epoch": 2.98, + "learning_rate": "1.4189e-04", + "loss": 0.8056, + "slid_loss": 0.6929, + "step": 3106, + "time": 12.96 + }, + { + "epoch": 2.98, + "learning_rate": "1.4185e-04", + "loss": 0.7951, + "slid_loss": 0.6941, + "step": 3107, + "time": 14.03 + }, + { + "epoch": 2.99, + "learning_rate": "1.4182e-04", + "loss": 0.766, + "slid_loss": 0.6952, + "step": 3108, + "time": 12.06 + }, + { + "epoch": 2.99, + "learning_rate": "1.4179e-04", + "loss": 0.7617, + "slid_loss": 0.6959, + "step": 3109, + "time": 12.63 + }, + { + "epoch": 2.99, + "learning_rate": "1.4175e-04", + "loss": 0.6083, + "slid_loss": 0.6947, + "step": 3110, + "time": 12.61 + }, + { + "epoch": 2.99, + "learning_rate": "1.4172e-04", + "loss": 0.6002, + "slid_loss": 0.6937, + "step": 3111, + "time": 13.57 + }, + { + "epoch": 2.99, + "learning_rate": "1.4169e-04", + "loss": 0.7745, + "slid_loss": 0.6946, + "step": 3112, + "time": 14.02 + }, + { + "epoch": 2.99, + "learning_rate": "1.4166e-04", + "loss": 0.7196, + "slid_loss": 0.6949, + "step": 3113, + "time": 13.4 + }, + { + "epoch": 2.99, + "learning_rate": "1.4162e-04", + "loss": 0.6583, + "slid_loss": 0.695, + "step": 3114, + "time": 13.04 + }, + { + "epoch": 2.99, + "learning_rate": "1.4159e-04", + "loss": 0.7411, + "slid_loss": 0.695, + "step": 3115, + "time": 11.5 + }, + { + "epoch": 2.99, + "learning_rate": "1.4156e-04", + "loss": 0.6555, + "slid_loss": 0.6942, + "step": 3116, + "time": 13.48 + }, + { + "epoch": 2.99, + "learning_rate": "1.4152e-04", + "loss": 0.7387, + "slid_loss": 0.6955, + "step": 3117, + "time": 12.82 + }, + { + "epoch": 3.0, + "learning_rate": "1.4149e-04", + "loss": 0.6716, + "slid_loss": 0.6961, + "step": 3118, + "time": 13.52 + }, + { + "epoch": 3.0, + "learning_rate": "1.4146e-04", + "loss": 0.7415, + "slid_loss": 0.6972, + "step": 3119, + "time": 13.47 + }, + { + "epoch": 3.0, + "learning_rate": "1.4142e-04", + "loss": 0.751, + "slid_loss": 0.6977, + "step": 3120, + "time": 13.53 + }, + { + "epoch": 3.0, + "learning_rate": "1.4139e-04", + "loss": 0.7142, + "slid_loss": 0.698, + "step": 3121, + "time": 12.21 + }, + { + "epoch": 3.0, + "learning_rate": "1.4136e-04", + "loss": 0.6907, + "slid_loss": 0.6986, + "step": 3122, + "time": 13.03 + }, + { + "epoch": 3.0, + "learning_rate": "1.4132e-04", + "loss": 0.7822, + "slid_loss": 0.6996, + "step": 3123, + "time": 12.19 + }, + { + "epoch": 3.0, + "learning_rate": "1.4129e-04", + "loss": 0.7672, + "slid_loss": 0.6998, + "step": 3124, + "time": 168.7 + }, + { + "epoch": 3.0, + "learning_rate": "1.4126e-04", + "loss": 0.7612, + "slid_loss": 0.7014, + "step": 3125, + "time": 12.93 + }, + { + "epoch": 3.0, + "learning_rate": "1.4123e-04", + "loss": 0.7416, + "slid_loss": 0.7021, + "step": 3126, + "time": 11.74 + }, + { + "epoch": 3.0, + "learning_rate": "1.4119e-04", + "loss": 0.7642, + "slid_loss": 0.704, + "step": 3127, + "time": 13.39 + }, + { + "epoch": 3.0, + "learning_rate": "1.4116e-04", + "loss": 0.8066, + "slid_loss": 0.7046, + "step": 3128, + "time": 11.75 + }, + { + "epoch": 3.01, + "learning_rate": "1.4113e-04", + "loss": 0.6525, + "slid_loss": 0.7038, + "step": 3129, + "time": 14.44 + }, + { + "epoch": 3.01, + "learning_rate": "1.4109e-04", + "loss": 0.7685, + "slid_loss": 0.7041, + "step": 3130, + "time": 13.63 + }, + { + "epoch": 3.01, + "learning_rate": "1.4106e-04", + "loss": 0.7013, + "slid_loss": 0.7037, + "step": 3131, + "time": 12.28 + }, + { + "epoch": 3.01, + "learning_rate": "1.4103e-04", + "loss": 0.748, + "slid_loss": 0.7046, + "step": 3132, + "time": 13.54 + }, + { + "epoch": 3.01, + "learning_rate": "1.4099e-04", + "loss": 0.7398, + "slid_loss": 0.7057, + "step": 3133, + "time": 13.65 + }, + { + "epoch": 3.01, + "learning_rate": "1.4096e-04", + "loss": 0.7436, + "slid_loss": 0.7063, + "step": 3134, + "time": 12.95 + }, + { + "epoch": 3.01, + "learning_rate": "1.4093e-04", + "loss": 0.5862, + "slid_loss": 0.7047, + "step": 3135, + "time": 12.98 + }, + { + "epoch": 3.01, + "learning_rate": "1.4090e-04", + "loss": 0.7214, + "slid_loss": 0.7059, + "step": 3136, + "time": 13.29 + }, + { + "epoch": 3.01, + "learning_rate": "1.4086e-04", + "loss": 0.566, + "slid_loss": 0.7046, + "step": 3137, + "time": 13.83 + }, + { + "epoch": 3.01, + "learning_rate": "1.4083e-04", + "loss": 0.6743, + "slid_loss": 0.7046, + "step": 3138, + "time": 12.35 + }, + { + "epoch": 3.02, + "learning_rate": "1.4080e-04", + "loss": 0.709, + "slid_loss": 0.7049, + "step": 3139, + "time": 13.21 + }, + { + "epoch": 3.02, + "learning_rate": "1.4076e-04", + "loss": 0.6561, + "slid_loss": 0.7045, + "step": 3140, + "time": 12.22 + }, + { + "epoch": 3.02, + "learning_rate": "1.4073e-04", + "loss": 0.7308, + "slid_loss": 0.705, + "step": 3141, + "time": 11.66 + }, + { + "epoch": 3.02, + "learning_rate": "1.4070e-04", + "loss": 0.7066, + "slid_loss": 0.7047, + "step": 3142, + "time": 11.74 + }, + { + "epoch": 3.02, + "learning_rate": "1.4067e-04", + "loss": 0.695, + "slid_loss": 0.7045, + "step": 3143, + "time": 11.9 + }, + { + "epoch": 3.02, + "learning_rate": "1.4063e-04", + "loss": 0.613, + "slid_loss": 0.7042, + "step": 3144, + "time": 13.97 + }, + { + "epoch": 3.02, + "learning_rate": "1.4060e-04", + "loss": 0.6559, + "slid_loss": 0.7032, + "step": 3145, + "time": 13.15 + }, + { + "epoch": 3.02, + "learning_rate": "1.4057e-04", + "loss": 0.7764, + "slid_loss": 0.7043, + "step": 3146, + "time": 13.39 + }, + { + "epoch": 3.02, + "learning_rate": "1.4053e-04", + "loss": 0.732, + "slid_loss": 0.7039, + "step": 3147, + "time": 13.3 + }, + { + "epoch": 3.02, + "learning_rate": "1.4050e-04", + "loss": 0.7285, + "slid_loss": 0.704, + "step": 3148, + "time": 12.72 + }, + { + "epoch": 3.02, + "learning_rate": "1.4047e-04", + "loss": 0.8375, + "slid_loss": 0.7054, + "step": 3149, + "time": 13.47 + }, + { + "epoch": 3.03, + "learning_rate": "1.4043e-04", + "loss": 0.6455, + "slid_loss": 0.7049, + "step": 3150, + "time": 13.65 + }, + { + "epoch": 3.03, + "learning_rate": "1.4040e-04", + "loss": 0.8021, + "slid_loss": 0.7058, + "step": 3151, + "time": 13.55 + }, + { + "epoch": 3.03, + "learning_rate": "1.4037e-04", + "loss": 0.722, + "slid_loss": 0.7073, + "step": 3152, + "time": 11.22 + }, + { + "epoch": 3.03, + "learning_rate": "1.4034e-04", + "loss": 0.5594, + "slid_loss": 0.7061, + "step": 3153, + "time": 13.23 + }, + { + "epoch": 3.03, + "learning_rate": "1.4030e-04", + "loss": 0.6474, + "slid_loss": 0.7061, + "step": 3154, + "time": 12.02 + }, + { + "epoch": 3.03, + "learning_rate": "1.4027e-04", + "loss": 0.6189, + "slid_loss": 0.7046, + "step": 3155, + "time": 12.9 + }, + { + "epoch": 3.03, + "learning_rate": "1.4024e-04", + "loss": 0.6948, + "slid_loss": 0.7045, + "step": 3156, + "time": 13.25 + }, + { + "epoch": 3.03, + "learning_rate": "1.4020e-04", + "loss": 0.7048, + "slid_loss": 0.7039, + "step": 3157, + "time": 11.85 + }, + { + "epoch": 3.03, + "learning_rate": "1.4017e-04", + "loss": 0.6745, + "slid_loss": 0.7036, + "step": 3158, + "time": 14.11 + }, + { + "epoch": 3.03, + "learning_rate": "1.4014e-04", + "loss": 0.7211, + "slid_loss": 0.7033, + "step": 3159, + "time": 12.43 + }, + { + "epoch": 3.04, + "learning_rate": "1.4011e-04", + "loss": 0.7262, + "slid_loss": 0.7035, + "step": 3160, + "time": 13.26 + }, + { + "epoch": 3.04, + "learning_rate": "1.4007e-04", + "loss": 0.6036, + "slid_loss": 0.7023, + "step": 3161, + "time": 13.36 + }, + { + "epoch": 3.04, + "learning_rate": "1.4004e-04", + "loss": 0.6643, + "slid_loss": 0.7033, + "step": 3162, + "time": 13.19 + }, + { + "epoch": 3.04, + "learning_rate": "1.4001e-04", + "loss": 0.7439, + "slid_loss": 0.7038, + "step": 3163, + "time": 11.6 + }, + { + "epoch": 3.04, + "learning_rate": "1.3997e-04", + "loss": 0.8062, + "slid_loss": 0.7043, + "step": 3164, + "time": 10.98 + }, + { + "epoch": 3.04, + "learning_rate": "1.3994e-04", + "loss": 0.7249, + "slid_loss": 0.7046, + "step": 3165, + "time": 13.07 + }, + { + "epoch": 3.04, + "learning_rate": "1.3991e-04", + "loss": 0.6062, + "slid_loss": 0.7037, + "step": 3166, + "time": 13.65 + }, + { + "epoch": 3.04, + "learning_rate": "1.3988e-04", + "loss": 0.6054, + "slid_loss": 0.7027, + "step": 3167, + "time": 13.85 + }, + { + "epoch": 3.04, + "learning_rate": "1.3984e-04", + "loss": 0.6388, + "slid_loss": 0.7027, + "step": 3168, + "time": 11.46 + }, + { + "epoch": 3.04, + "learning_rate": "1.3981e-04", + "loss": 0.8312, + "slid_loss": 0.7043, + "step": 3169, + "time": 13.35 + }, + { + "epoch": 3.05, + "learning_rate": "1.3978e-04", + "loss": 0.6952, + "slid_loss": 0.7041, + "step": 3170, + "time": 12.01 + }, + { + "epoch": 3.05, + "learning_rate": "1.3974e-04", + "loss": 0.715, + "slid_loss": 0.7046, + "step": 3171, + "time": 14.03 + }, + { + "epoch": 3.05, + "learning_rate": "1.3971e-04", + "loss": 0.6922, + "slid_loss": 0.7046, + "step": 3172, + "time": 13.17 + }, + { + "epoch": 3.05, + "learning_rate": "1.3968e-04", + "loss": 0.7078, + "slid_loss": 0.7052, + "step": 3173, + "time": 13.28 + }, + { + "epoch": 3.05, + "learning_rate": "1.3965e-04", + "loss": 0.66, + "slid_loss": 0.7051, + "step": 3174, + "time": 14.23 + }, + { + "epoch": 3.05, + "learning_rate": "1.3961e-04", + "loss": 0.7304, + "slid_loss": 0.7054, + "step": 3175, + "time": 12.97 + }, + { + "epoch": 3.05, + "learning_rate": "1.3958e-04", + "loss": 0.6139, + "slid_loss": 0.7043, + "step": 3176, + "time": 13.51 + }, + { + "epoch": 3.05, + "learning_rate": "1.3955e-04", + "loss": 0.732, + "slid_loss": 0.7038, + "step": 3177, + "time": 13.54 + }, + { + "epoch": 3.05, + "learning_rate": "1.3951e-04", + "loss": 0.6709, + "slid_loss": 0.703, + "step": 3178, + "time": 12.19 + }, + { + "epoch": 3.05, + "learning_rate": "1.3948e-04", + "loss": 0.6941, + "slid_loss": 0.7025, + "step": 3179, + "time": 13.19 + }, + { + "epoch": 3.05, + "learning_rate": "1.3945e-04", + "loss": 0.6646, + "slid_loss": 0.7025, + "step": 3180, + "time": 12.83 + }, + { + "epoch": 3.06, + "learning_rate": "1.3942e-04", + "loss": 0.6924, + "slid_loss": 0.7024, + "step": 3181, + "time": 11.1 + }, + { + "epoch": 3.06, + "learning_rate": "1.3938e-04", + "loss": 0.6183, + "slid_loss": 0.7011, + "step": 3182, + "time": 13.67 + }, + { + "epoch": 3.06, + "learning_rate": "1.3935e-04", + "loss": 0.7008, + "slid_loss": 0.7003, + "step": 3183, + "time": 13.51 + }, + { + "epoch": 3.06, + "learning_rate": "1.3932e-04", + "loss": 0.6288, + "slid_loss": 0.6988, + "step": 3184, + "time": 12.18 + }, + { + "epoch": 3.06, + "learning_rate": "1.3929e-04", + "loss": 0.7464, + "slid_loss": 0.7005, + "step": 3185, + "time": 11.66 + }, + { + "epoch": 3.06, + "learning_rate": "1.3925e-04", + "loss": 0.8301, + "slid_loss": 0.7016, + "step": 3186, + "time": 12.93 + }, + { + "epoch": 3.06, + "learning_rate": "1.3922e-04", + "loss": 0.6097, + "slid_loss": 0.7008, + "step": 3187, + "time": 13.6 + }, + { + "epoch": 3.06, + "learning_rate": "1.3919e-04", + "loss": 0.744, + "slid_loss": 0.7014, + "step": 3188, + "time": 12.87 + }, + { + "epoch": 3.06, + "learning_rate": "1.3915e-04", + "loss": 0.676, + "slid_loss": 0.7008, + "step": 3189, + "time": 13.48 + }, + { + "epoch": 3.06, + "learning_rate": "1.3912e-04", + "loss": 0.6278, + "slid_loss": 0.7004, + "step": 3190, + "time": 13.23 + }, + { + "epoch": 3.07, + "learning_rate": "1.3909e-04", + "loss": 0.7287, + "slid_loss": 0.7005, + "step": 3191, + "time": 13.26 + }, + { + "epoch": 3.07, + "learning_rate": "1.3906e-04", + "loss": 0.6362, + "slid_loss": 0.7005, + "step": 3192, + "time": 13.71 + }, + { + "epoch": 3.07, + "learning_rate": "1.3902e-04", + "loss": 0.6782, + "slid_loss": 0.7, + "step": 3193, + "time": 10.92 + }, + { + "epoch": 3.07, + "learning_rate": "1.3899e-04", + "loss": 0.644, + "slid_loss": 0.6993, + "step": 3194, + "time": 13.49 + }, + { + "epoch": 3.07, + "learning_rate": "1.3896e-04", + "loss": 0.8051, + "slid_loss": 0.6999, + "step": 3195, + "time": 14.14 + }, + { + "epoch": 3.07, + "learning_rate": "1.3893e-04", + "loss": 0.6883, + "slid_loss": 0.6995, + "step": 3196, + "time": 13.72 + }, + { + "epoch": 3.07, + "learning_rate": "1.3889e-04", + "loss": 0.6589, + "slid_loss": 0.6992, + "step": 3197, + "time": 13.43 + }, + { + "epoch": 3.07, + "learning_rate": "1.3886e-04", + "loss": 0.6556, + "slid_loss": 0.699, + "step": 3198, + "time": 13.38 + }, + { + "epoch": 3.07, + "learning_rate": "1.3883e-04", + "loss": 0.6386, + "slid_loss": 0.6987, + "step": 3199, + "time": 13.37 + }, + { + "epoch": 3.07, + "learning_rate": "1.3879e-04", + "loss": 0.7216, + "slid_loss": 0.7005, + "step": 3200, + "time": 13.95 + }, + { + "epoch": 3.07, + "learning_rate": "1.3876e-04", + "loss": 0.7405, + "slid_loss": 0.7004, + "step": 3201, + "time": 11.28 + }, + { + "epoch": 3.08, + "learning_rate": "1.3873e-04", + "loss": 0.6524, + "slid_loss": 0.6999, + "step": 3202, + "time": 13.25 + }, + { + "epoch": 3.08, + "learning_rate": "1.3870e-04", + "loss": 0.756, + "slid_loss": 0.7007, + "step": 3203, + "time": 12.21 + }, + { + "epoch": 3.08, + "learning_rate": "1.3866e-04", + "loss": 0.6193, + "slid_loss": 0.7005, + "step": 3204, + "time": 13.51 + }, + { + "epoch": 3.08, + "learning_rate": "1.3863e-04", + "loss": 0.7209, + "slid_loss": 0.701, + "step": 3205, + "time": 12.93 + }, + { + "epoch": 3.08, + "learning_rate": "1.3860e-04", + "loss": 0.6373, + "slid_loss": 0.6993, + "step": 3206, + "time": 13.42 + }, + { + "epoch": 3.08, + "learning_rate": "1.3857e-04", + "loss": 0.7251, + "slid_loss": 0.6986, + "step": 3207, + "time": 14.08 + }, + { + "epoch": 3.08, + "learning_rate": "1.3853e-04", + "loss": 0.6462, + "slid_loss": 0.6974, + "step": 3208, + "time": 12.82 + }, + { + "epoch": 3.08, + "learning_rate": "1.3850e-04", + "loss": 0.5842, + "slid_loss": 0.6956, + "step": 3209, + "time": 13.53 + }, + { + "epoch": 3.08, + "learning_rate": "1.3847e-04", + "loss": 0.6969, + "slid_loss": 0.6965, + "step": 3210, + "time": 12.56 + }, + { + "epoch": 3.08, + "learning_rate": "1.3844e-04", + "loss": 0.5923, + "slid_loss": 0.6964, + "step": 3211, + "time": 13.91 + }, + { + "epoch": 3.09, + "learning_rate": "1.3840e-04", + "loss": 0.7121, + "slid_loss": 0.6958, + "step": 3212, + "time": 11.85 + }, + { + "epoch": 3.09, + "learning_rate": "1.3837e-04", + "loss": 0.7137, + "slid_loss": 0.6957, + "step": 3213, + "time": 12.96 + }, + { + "epoch": 3.09, + "learning_rate": "1.3834e-04", + "loss": 0.6624, + "slid_loss": 0.6958, + "step": 3214, + "time": 13.06 + }, + { + "epoch": 3.09, + "learning_rate": "1.3830e-04", + "loss": 0.8034, + "slid_loss": 0.6964, + "step": 3215, + "time": 13.66 + }, + { + "epoch": 3.09, + "learning_rate": "1.3827e-04", + "loss": 0.7099, + "slid_loss": 0.6969, + "step": 3216, + "time": 14.02 + }, + { + "epoch": 3.09, + "learning_rate": "1.3824e-04", + "loss": 0.6684, + "slid_loss": 0.6962, + "step": 3217, + "time": 13.78 + }, + { + "epoch": 3.09, + "learning_rate": "1.3821e-04", + "loss": 0.674, + "slid_loss": 0.6962, + "step": 3218, + "time": 13.77 + }, + { + "epoch": 3.09, + "learning_rate": "1.3817e-04", + "loss": 0.8261, + "slid_loss": 0.6971, + "step": 3219, + "time": 13.55 + }, + { + "epoch": 3.09, + "learning_rate": "1.3814e-04", + "loss": 0.6781, + "slid_loss": 0.6964, + "step": 3220, + "time": 13.93 + }, + { + "epoch": 3.09, + "learning_rate": "1.3811e-04", + "loss": 0.7043, + "slid_loss": 0.6963, + "step": 3221, + "time": 13.58 + }, + { + "epoch": 3.1, + "learning_rate": "1.3808e-04", + "loss": 0.6437, + "slid_loss": 0.6958, + "step": 3222, + "time": 12.67 + }, + { + "epoch": 3.1, + "learning_rate": "1.3804e-04", + "loss": 0.6106, + "slid_loss": 0.6941, + "step": 3223, + "time": 13.58 + }, + { + "epoch": 3.1, + "learning_rate": "1.3801e-04", + "loss": 0.7133, + "slid_loss": 0.6935, + "step": 3224, + "time": 13.35 + }, + { + "epoch": 3.1, + "learning_rate": "1.3798e-04", + "loss": 0.5977, + "slid_loss": 0.6919, + "step": 3225, + "time": 11.18 + }, + { + "epoch": 3.1, + "learning_rate": "1.3795e-04", + "loss": 0.6391, + "slid_loss": 0.6909, + "step": 3226, + "time": 11.53 + }, + { + "epoch": 3.1, + "learning_rate": "1.3791e-04", + "loss": 0.7556, + "slid_loss": 0.6908, + "step": 3227, + "time": 13.48 + }, + { + "epoch": 3.1, + "learning_rate": "1.3788e-04", + "loss": 0.6963, + "slid_loss": 0.6897, + "step": 3228, + "time": 11.68 + }, + { + "epoch": 3.1, + "learning_rate": "1.3785e-04", + "loss": 0.6852, + "slid_loss": 0.69, + "step": 3229, + "time": 12.81 + }, + { + "epoch": 3.1, + "learning_rate": "1.3782e-04", + "loss": 0.7193, + "slid_loss": 0.6895, + "step": 3230, + "time": 14.07 + }, + { + "epoch": 3.1, + "learning_rate": "1.3778e-04", + "loss": 0.6781, + "slid_loss": 0.6893, + "step": 3231, + "time": 11.34 + }, + { + "epoch": 3.1, + "learning_rate": "1.3775e-04", + "loss": 0.6312, + "slid_loss": 0.6881, + "step": 3232, + "time": 11.43 + }, + { + "epoch": 3.11, + "learning_rate": "1.3772e-04", + "loss": 0.6932, + "slid_loss": 0.6877, + "step": 3233, + "time": 13.98 + }, + { + "epoch": 3.11, + "learning_rate": "1.3769e-04", + "loss": 0.7154, + "slid_loss": 0.6874, + "step": 3234, + "time": 13.29 + }, + { + "epoch": 3.11, + "learning_rate": "1.3765e-04", + "loss": 0.6217, + "slid_loss": 0.6877, + "step": 3235, + "time": 12.12 + }, + { + "epoch": 3.11, + "learning_rate": "1.3762e-04", + "loss": 0.5643, + "slid_loss": 0.6862, + "step": 3236, + "time": 12.86 + }, + { + "epoch": 3.11, + "learning_rate": "1.3759e-04", + "loss": 0.6934, + "slid_loss": 0.6874, + "step": 3237, + "time": 13.13 + }, + { + "epoch": 3.11, + "learning_rate": "1.3756e-04", + "loss": 0.7757, + "slid_loss": 0.6885, + "step": 3238, + "time": 12.23 + }, + { + "epoch": 3.11, + "learning_rate": "1.3752e-04", + "loss": 0.8026, + "slid_loss": 0.6894, + "step": 3239, + "time": 13.43 + }, + { + "epoch": 3.11, + "learning_rate": "1.3749e-04", + "loss": 0.7113, + "slid_loss": 0.6899, + "step": 3240, + "time": 13.52 + }, + { + "epoch": 3.11, + "learning_rate": "1.3746e-04", + "loss": 0.7167, + "slid_loss": 0.6898, + "step": 3241, + "time": 12.77 + }, + { + "epoch": 3.11, + "learning_rate": "1.3743e-04", + "loss": 0.6399, + "slid_loss": 0.6891, + "step": 3242, + "time": 13.68 + }, + { + "epoch": 3.12, + "learning_rate": "1.3739e-04", + "loss": 0.5965, + "slid_loss": 0.6881, + "step": 3243, + "time": 13.72 + }, + { + "epoch": 3.12, + "learning_rate": "1.3736e-04", + "loss": 0.742, + "slid_loss": 0.6894, + "step": 3244, + "time": 12.9 + }, + { + "epoch": 3.12, + "learning_rate": "1.3733e-04", + "loss": 0.6554, + "slid_loss": 0.6894, + "step": 3245, + "time": 12.81 + }, + { + "epoch": 3.12, + "learning_rate": "1.3730e-04", + "loss": 0.6197, + "slid_loss": 0.6879, + "step": 3246, + "time": 13.18 + }, + { + "epoch": 3.12, + "learning_rate": "1.3726e-04", + "loss": 0.7444, + "slid_loss": 0.688, + "step": 3247, + "time": 11.91 + }, + { + "epoch": 3.12, + "learning_rate": "1.3723e-04", + "loss": 0.6823, + "slid_loss": 0.6875, + "step": 3248, + "time": 11.25 + }, + { + "epoch": 3.12, + "learning_rate": "1.3720e-04", + "loss": 0.7295, + "slid_loss": 0.6864, + "step": 3249, + "time": 12.23 + }, + { + "epoch": 3.12, + "learning_rate": "1.3717e-04", + "loss": 0.698, + "slid_loss": 0.687, + "step": 3250, + "time": 12.9 + }, + { + "epoch": 3.12, + "learning_rate": "1.3713e-04", + "loss": 0.6449, + "slid_loss": 0.6854, + "step": 3251, + "time": 13.45 + }, + { + "epoch": 3.12, + "learning_rate": "1.3710e-04", + "loss": 0.7466, + "slid_loss": 0.6856, + "step": 3252, + "time": 13.93 + }, + { + "epoch": 3.12, + "learning_rate": "1.3707e-04", + "loss": 0.6312, + "slid_loss": 0.6864, + "step": 3253, + "time": 13.57 + }, + { + "epoch": 3.13, + "learning_rate": "1.3704e-04", + "loss": 0.6325, + "slid_loss": 0.6862, + "step": 3254, + "time": 13.74 + }, + { + "epoch": 3.13, + "learning_rate": "1.3701e-04", + "loss": 0.69, + "slid_loss": 0.6869, + "step": 3255, + "time": 12.23 + }, + { + "epoch": 3.13, + "learning_rate": "1.3697e-04", + "loss": 0.6232, + "slid_loss": 0.6862, + "step": 3256, + "time": 14.03 + }, + { + "epoch": 3.13, + "learning_rate": "1.3694e-04", + "loss": 0.5775, + "slid_loss": 0.6849, + "step": 3257, + "time": 13.01 + }, + { + "epoch": 3.13, + "learning_rate": "1.3691e-04", + "loss": 0.6228, + "slid_loss": 0.6844, + "step": 3258, + "time": 11.4 + }, + { + "epoch": 3.13, + "learning_rate": "1.3688e-04", + "loss": 0.6261, + "slid_loss": 0.6835, + "step": 3259, + "time": 11.31 + }, + { + "epoch": 3.13, + "learning_rate": "1.3684e-04", + "loss": 0.7373, + "slid_loss": 0.6836, + "step": 3260, + "time": 11.39 + }, + { + "epoch": 3.13, + "learning_rate": "1.3681e-04", + "loss": 0.5971, + "slid_loss": 0.6835, + "step": 3261, + "time": 12.83 + }, + { + "epoch": 3.13, + "learning_rate": "1.3678e-04", + "loss": 0.7308, + "slid_loss": 0.6842, + "step": 3262, + "time": 12.52 + }, + { + "epoch": 3.13, + "learning_rate": "1.3675e-04", + "loss": 0.6147, + "slid_loss": 0.6829, + "step": 3263, + "time": 13.4 + }, + { + "epoch": 3.14, + "learning_rate": "1.3671e-04", + "loss": 0.7135, + "slid_loss": 0.682, + "step": 3264, + "time": 14.24 + }, + { + "epoch": 3.14, + "learning_rate": "1.3668e-04", + "loss": 0.6215, + "slid_loss": 0.6809, + "step": 3265, + "time": 10.95 + }, + { + "epoch": 3.14, + "learning_rate": "1.3665e-04", + "loss": 0.6423, + "slid_loss": 0.6813, + "step": 3266, + "time": 13.0 + }, + { + "epoch": 3.14, + "learning_rate": "1.3662e-04", + "loss": 0.8409, + "slid_loss": 0.6836, + "step": 3267, + "time": 13.33 + }, + { + "epoch": 3.14, + "learning_rate": "1.3658e-04", + "loss": 0.6828, + "slid_loss": 0.6841, + "step": 3268, + "time": 12.91 + }, + { + "epoch": 3.14, + "learning_rate": "1.3655e-04", + "loss": 0.625, + "slid_loss": 0.682, + "step": 3269, + "time": 12.3 + }, + { + "epoch": 3.14, + "learning_rate": "1.3652e-04", + "loss": 0.6589, + "slid_loss": 0.6817, + "step": 3270, + "time": 11.44 + }, + { + "epoch": 3.14, + "learning_rate": "1.3649e-04", + "loss": 0.6201, + "slid_loss": 0.6807, + "step": 3271, + "time": 13.0 + }, + { + "epoch": 3.14, + "learning_rate": "1.3646e-04", + "loss": 0.8047, + "slid_loss": 0.6818, + "step": 3272, + "time": 14.46 + }, + { + "epoch": 3.14, + "learning_rate": "1.3642e-04", + "loss": 0.7142, + "slid_loss": 0.6819, + "step": 3273, + "time": 11.07 + }, + { + "epoch": 3.15, + "learning_rate": "1.3639e-04", + "loss": 0.7352, + "slid_loss": 0.6827, + "step": 3274, + "time": 11.95 + }, + { + "epoch": 3.15, + "learning_rate": "1.3636e-04", + "loss": 0.7001, + "slid_loss": 0.6823, + "step": 3275, + "time": 12.48 + }, + { + "epoch": 3.15, + "learning_rate": "1.3633e-04", + "loss": 0.634, + "slid_loss": 0.6825, + "step": 3276, + "time": 13.24 + }, + { + "epoch": 3.15, + "learning_rate": "1.3629e-04", + "loss": 0.6638, + "slid_loss": 0.6819, + "step": 3277, + "time": 14.62 + }, + { + "epoch": 3.15, + "learning_rate": "1.3626e-04", + "loss": 0.6915, + "slid_loss": 0.6821, + "step": 3278, + "time": 13.28 + }, + { + "epoch": 3.15, + "learning_rate": "1.3623e-04", + "loss": 0.7559, + "slid_loss": 0.6827, + "step": 3279, + "time": 13.38 + }, + { + "epoch": 3.15, + "learning_rate": "1.3620e-04", + "loss": 0.6621, + "slid_loss": 0.6827, + "step": 3280, + "time": 13.38 + }, + { + "epoch": 3.15, + "learning_rate": "1.3617e-04", + "loss": 0.6505, + "slid_loss": 0.6822, + "step": 3281, + "time": 13.9 + }, + { + "epoch": 3.15, + "learning_rate": "1.3613e-04", + "loss": 0.5872, + "slid_loss": 0.6819, + "step": 3282, + "time": 13.83 + }, + { + "epoch": 3.15, + "learning_rate": "1.3610e-04", + "loss": 0.6235, + "slid_loss": 0.6812, + "step": 3283, + "time": 12.37 + }, + { + "epoch": 3.15, + "learning_rate": "1.3607e-04", + "loss": 0.6815, + "slid_loss": 0.6817, + "step": 3284, + "time": 13.39 + }, + { + "epoch": 3.16, + "learning_rate": "1.3604e-04", + "loss": 0.5809, + "slid_loss": 0.68, + "step": 3285, + "time": 13.64 + }, + { + "epoch": 3.16, + "learning_rate": "1.3600e-04", + "loss": 0.6227, + "slid_loss": 0.678, + "step": 3286, + "time": 12.72 + }, + { + "epoch": 3.16, + "learning_rate": "1.3597e-04", + "loss": 0.5773, + "slid_loss": 0.6776, + "step": 3287, + "time": 12.99 + }, + { + "epoch": 3.16, + "learning_rate": "1.3594e-04", + "loss": 0.627, + "slid_loss": 0.6765, + "step": 3288, + "time": 11.81 + }, + { + "epoch": 3.16, + "learning_rate": "1.3591e-04", + "loss": 0.6733, + "slid_loss": 0.6764, + "step": 3289, + "time": 12.92 + }, + { + "epoch": 3.16, + "learning_rate": "1.3588e-04", + "loss": 0.6861, + "slid_loss": 0.677, + "step": 3290, + "time": 11.13 + }, + { + "epoch": 3.16, + "learning_rate": "1.3584e-04", + "loss": 0.5781, + "slid_loss": 0.6755, + "step": 3291, + "time": 12.79 + }, + { + "epoch": 3.16, + "learning_rate": "1.3581e-04", + "loss": 0.5836, + "slid_loss": 0.675, + "step": 3292, + "time": 13.56 + }, + { + "epoch": 3.16, + "learning_rate": "1.3578e-04", + "loss": 0.6979, + "slid_loss": 0.6752, + "step": 3293, + "time": 13.28 + }, + { + "epoch": 3.16, + "learning_rate": "1.3575e-04", + "loss": 0.6836, + "slid_loss": 0.6756, + "step": 3294, + "time": 13.72 + }, + { + "epoch": 3.17, + "learning_rate": "1.3571e-04", + "loss": 0.7617, + "slid_loss": 0.6752, + "step": 3295, + "time": 12.9 + }, + { + "epoch": 3.17, + "learning_rate": "1.3568e-04", + "loss": 0.7569, + "slid_loss": 0.6758, + "step": 3296, + "time": 13.16 + }, + { + "epoch": 3.17, + "learning_rate": "1.3565e-04", + "loss": 0.6258, + "slid_loss": 0.6755, + "step": 3297, + "time": 13.68 + }, + { + "epoch": 3.17, + "learning_rate": "1.3562e-04", + "loss": 0.6383, + "slid_loss": 0.6753, + "step": 3298, + "time": 12.97 + }, + { + "epoch": 3.17, + "learning_rate": "1.3559e-04", + "loss": 0.6902, + "slid_loss": 0.6759, + "step": 3299, + "time": 11.53 + }, + { + "epoch": 3.17, + "learning_rate": "1.3555e-04", + "loss": 0.6196, + "slid_loss": 0.6748, + "step": 3300, + "time": 13.25 + }, + { + "epoch": 3.17, + "learning_rate": "1.3552e-04", + "loss": 0.7022, + "slid_loss": 0.6744, + "step": 3301, + "time": 13.11 + }, + { + "epoch": 3.17, + "learning_rate": "1.3549e-04", + "loss": 0.5931, + "slid_loss": 0.6739, + "step": 3302, + "time": 13.11 + }, + { + "epoch": 3.17, + "learning_rate": "1.3546e-04", + "loss": 0.659, + "slid_loss": 0.6729, + "step": 3303, + "time": 13.42 + }, + { + "epoch": 3.17, + "learning_rate": "1.3543e-04", + "loss": 0.6057, + "slid_loss": 0.6727, + "step": 3304, + "time": 12.57 + }, + { + "epoch": 3.17, + "learning_rate": "1.3539e-04", + "loss": 0.6678, + "slid_loss": 0.6722, + "step": 3305, + "time": 13.28 + }, + { + "epoch": 3.18, + "learning_rate": "1.3536e-04", + "loss": 0.6154, + "slid_loss": 0.672, + "step": 3306, + "time": 13.89 + }, + { + "epoch": 3.18, + "learning_rate": "1.3533e-04", + "loss": 0.5986, + "slid_loss": 0.6707, + "step": 3307, + "time": 12.89 + }, + { + "epoch": 3.18, + "learning_rate": "1.3530e-04", + "loss": 0.5676, + "slid_loss": 0.6699, + "step": 3308, + "time": 13.75 + }, + { + "epoch": 3.18, + "learning_rate": "1.3527e-04", + "loss": 0.7246, + "slid_loss": 0.6714, + "step": 3309, + "time": 13.07 + }, + { + "epoch": 3.18, + "learning_rate": "1.3523e-04", + "loss": 0.5979, + "slid_loss": 0.6704, + "step": 3310, + "time": 11.04 + }, + { + "epoch": 3.18, + "learning_rate": "1.3520e-04", + "loss": 0.6939, + "slid_loss": 0.6714, + "step": 3311, + "time": 12.81 + }, + { + "epoch": 3.18, + "learning_rate": "1.3517e-04", + "loss": 0.6469, + "slid_loss": 0.6707, + "step": 3312, + "time": 12.94 + }, + { + "epoch": 3.18, + "learning_rate": "1.3514e-04", + "loss": 0.5894, + "slid_loss": 0.6695, + "step": 3313, + "time": 13.91 + }, + { + "epoch": 3.18, + "learning_rate": "1.3511e-04", + "loss": 0.7303, + "slid_loss": 0.6702, + "step": 3314, + "time": 12.89 + }, + { + "epoch": 3.18, + "learning_rate": "1.3507e-04", + "loss": 0.6274, + "slid_loss": 0.6684, + "step": 3315, + "time": 12.23 + }, + { + "epoch": 3.19, + "learning_rate": "1.3504e-04", + "loss": 0.7723, + "slid_loss": 0.669, + "step": 3316, + "time": 10.93 + }, + { + "epoch": 3.19, + "learning_rate": "1.3501e-04", + "loss": 0.6318, + "slid_loss": 0.6687, + "step": 3317, + "time": 13.35 + }, + { + "epoch": 3.19, + "learning_rate": "1.3498e-04", + "loss": 0.6214, + "slid_loss": 0.6681, + "step": 3318, + "time": 13.14 + }, + { + "epoch": 3.19, + "learning_rate": "1.3495e-04", + "loss": 0.5654, + "slid_loss": 0.6655, + "step": 3319, + "time": 13.76 + }, + { + "epoch": 3.19, + "learning_rate": "1.3491e-04", + "loss": 0.6817, + "slid_loss": 0.6656, + "step": 3320, + "time": 12.19 + }, + { + "epoch": 3.19, + "learning_rate": "1.3488e-04", + "loss": 0.5939, + "slid_loss": 0.6645, + "step": 3321, + "time": 11.69 + }, + { + "epoch": 3.19, + "learning_rate": "1.3485e-04", + "loss": 0.6668, + "slid_loss": 0.6647, + "step": 3322, + "time": 12.93 + }, + { + "epoch": 3.19, + "learning_rate": "1.3482e-04", + "loss": 0.6362, + "slid_loss": 0.6649, + "step": 3323, + "time": 12.59 + }, + { + "epoch": 3.19, + "learning_rate": "1.3479e-04", + "loss": 0.5771, + "slid_loss": 0.6636, + "step": 3324, + "time": 13.9 + }, + { + "epoch": 3.19, + "learning_rate": "1.3475e-04", + "loss": 0.6563, + "slid_loss": 0.6642, + "step": 3325, + "time": 13.69 + }, + { + "epoch": 3.2, + "learning_rate": "1.3472e-04", + "loss": 0.6076, + "slid_loss": 0.6639, + "step": 3326, + "time": 12.42 + }, + { + "epoch": 3.2, + "learning_rate": "1.3469e-04", + "loss": 0.6605, + "slid_loss": 0.6629, + "step": 3327, + "time": 13.22 + }, + { + "epoch": 3.2, + "learning_rate": "1.3466e-04", + "loss": 0.5887, + "slid_loss": 0.6618, + "step": 3328, + "time": 11.43 + }, + { + "epoch": 3.2, + "learning_rate": "1.3463e-04", + "loss": 0.6595, + "slid_loss": 0.6616, + "step": 3329, + "time": 12.97 + }, + { + "epoch": 3.2, + "learning_rate": "1.3459e-04", + "loss": 0.5991, + "slid_loss": 0.6604, + "step": 3330, + "time": 13.11 + }, + { + "epoch": 3.2, + "learning_rate": "1.3456e-04", + "loss": 0.6761, + "slid_loss": 0.6603, + "step": 3331, + "time": 12.05 + }, + { + "epoch": 3.2, + "learning_rate": "1.3453e-04", + "loss": 0.633, + "slid_loss": 0.6604, + "step": 3332, + "time": 13.38 + }, + { + "epoch": 3.2, + "learning_rate": "1.3450e-04", + "loss": 0.7339, + "slid_loss": 0.6608, + "step": 3333, + "time": 11.52 + }, + { + "epoch": 3.2, + "learning_rate": "1.3447e-04", + "loss": 0.715, + "slid_loss": 0.6608, + "step": 3334, + "time": 13.79 + }, + { + "epoch": 3.2, + "learning_rate": "1.3443e-04", + "loss": 0.7094, + "slid_loss": 0.6616, + "step": 3335, + "time": 13.31 + }, + { + "epoch": 3.2, + "learning_rate": "1.3440e-04", + "loss": 0.7156, + "slid_loss": 0.6632, + "step": 3336, + "time": 11.47 + }, + { + "epoch": 3.21, + "learning_rate": "1.3437e-04", + "loss": 0.6799, + "slid_loss": 0.663, + "step": 3337, + "time": 12.6 + }, + { + "epoch": 3.21, + "learning_rate": "1.3434e-04", + "loss": 0.6772, + "slid_loss": 0.662, + "step": 3338, + "time": 13.31 + }, + { + "epoch": 3.21, + "learning_rate": "1.3431e-04", + "loss": 0.6144, + "slid_loss": 0.6602, + "step": 3339, + "time": 13.33 + }, + { + "epoch": 3.21, + "learning_rate": "1.3428e-04", + "loss": 0.6166, + "slid_loss": 0.6592, + "step": 3340, + "time": 12.87 + }, + { + "epoch": 3.21, + "learning_rate": "1.3424e-04", + "loss": 0.6997, + "slid_loss": 0.659, + "step": 3341, + "time": 12.91 + }, + { + "epoch": 3.21, + "learning_rate": "1.3421e-04", + "loss": 0.72, + "slid_loss": 0.6598, + "step": 3342, + "time": 13.49 + }, + { + "epoch": 3.21, + "learning_rate": "1.3418e-04", + "loss": 0.638, + "slid_loss": 0.6603, + "step": 3343, + "time": 13.02 + }, + { + "epoch": 3.21, + "learning_rate": "1.3415e-04", + "loss": 0.6522, + "slid_loss": 0.6594, + "step": 3344, + "time": 13.8 + }, + { + "epoch": 3.21, + "learning_rate": "1.3412e-04", + "loss": 0.6518, + "slid_loss": 0.6593, + "step": 3345, + "time": 12.09 + }, + { + "epoch": 3.21, + "learning_rate": "1.3408e-04", + "loss": 0.6996, + "slid_loss": 0.6601, + "step": 3346, + "time": 12.51 + }, + { + "epoch": 3.22, + "learning_rate": "1.3405e-04", + "loss": 0.5865, + "slid_loss": 0.6585, + "step": 3347, + "time": 11.63 + }, + { + "epoch": 3.22, + "learning_rate": "1.3402e-04", + "loss": 0.6442, + "slid_loss": 0.6582, + "step": 3348, + "time": 11.51 + }, + { + "epoch": 3.22, + "learning_rate": "1.3399e-04", + "loss": 0.6179, + "slid_loss": 0.657, + "step": 3349, + "time": 13.42 + }, + { + "epoch": 3.22, + "learning_rate": "1.3396e-04", + "loss": 0.6907, + "slid_loss": 0.657, + "step": 3350, + "time": 13.49 + }, + { + "epoch": 3.22, + "learning_rate": "1.3393e-04", + "loss": 0.6699, + "slid_loss": 0.6572, + "step": 3351, + "time": 11.63 + }, + { + "epoch": 3.22, + "learning_rate": "1.3389e-04", + "loss": 0.6988, + "slid_loss": 0.6567, + "step": 3352, + "time": 12.43 + }, + { + "epoch": 3.22, + "learning_rate": "1.3386e-04", + "loss": 0.6495, + "slid_loss": 0.6569, + "step": 3353, + "time": 12.93 + }, + { + "epoch": 3.22, + "learning_rate": "1.3383e-04", + "loss": 0.7588, + "slid_loss": 0.6582, + "step": 3354, + "time": 13.75 + }, + { + "epoch": 3.22, + "learning_rate": "1.3380e-04", + "loss": 0.7779, + "slid_loss": 0.6591, + "step": 3355, + "time": 14.09 + }, + { + "epoch": 3.22, + "learning_rate": "1.3377e-04", + "loss": 0.6539, + "slid_loss": 0.6594, + "step": 3356, + "time": 13.2 + }, + { + "epoch": 3.22, + "learning_rate": "1.3373e-04", + "loss": 0.7241, + "slid_loss": 0.6608, + "step": 3357, + "time": 14.49 + }, + { + "epoch": 3.23, + "learning_rate": "1.3370e-04", + "loss": 0.5662, + "slid_loss": 0.6603, + "step": 3358, + "time": 11.51 + }, + { + "epoch": 3.23, + "learning_rate": "1.3367e-04", + "loss": 0.7151, + "slid_loss": 0.6612, + "step": 3359, + "time": 13.44 + }, + { + "epoch": 3.23, + "learning_rate": "1.3364e-04", + "loss": 0.7163, + "slid_loss": 0.661, + "step": 3360, + "time": 13.19 + }, + { + "epoch": 3.23, + "learning_rate": "1.3361e-04", + "loss": 0.664, + "slid_loss": 0.6616, + "step": 3361, + "time": 13.9 + }, + { + "epoch": 3.23, + "learning_rate": "1.3358e-04", + "loss": 0.5622, + "slid_loss": 0.6599, + "step": 3362, + "time": 11.92 + }, + { + "epoch": 3.23, + "learning_rate": "1.3354e-04", + "loss": 0.6787, + "slid_loss": 0.6606, + "step": 3363, + "time": 13.51 + }, + { + "epoch": 3.23, + "learning_rate": "1.3351e-04", + "loss": 0.6547, + "slid_loss": 0.66, + "step": 3364, + "time": 11.73 + }, + { + "epoch": 3.23, + "learning_rate": "1.3348e-04", + "loss": 0.6794, + "slid_loss": 0.6606, + "step": 3365, + "time": 13.52 + }, + { + "epoch": 3.23, + "learning_rate": "1.3345e-04", + "loss": 0.6005, + "slid_loss": 0.6602, + "step": 3366, + "time": 14.05 + }, + { + "epoch": 3.23, + "learning_rate": "1.3342e-04", + "loss": 0.6205, + "slid_loss": 0.6579, + "step": 3367, + "time": 14.03 + }, + { + "epoch": 3.24, + "learning_rate": "1.3339e-04", + "loss": 0.6591, + "slid_loss": 0.6577, + "step": 3368, + "time": 13.94 + }, + { + "epoch": 3.24, + "learning_rate": "1.3335e-04", + "loss": 0.6542, + "slid_loss": 0.658, + "step": 3369, + "time": 12.9 + }, + { + "epoch": 3.24, + "learning_rate": "1.3332e-04", + "loss": 0.6032, + "slid_loss": 0.6574, + "step": 3370, + "time": 13.43 + }, + { + "epoch": 3.24, + "learning_rate": "1.3329e-04", + "loss": 0.711, + "slid_loss": 0.6584, + "step": 3371, + "time": 13.57 + }, + { + "epoch": 3.24, + "learning_rate": "1.3326e-04", + "loss": 0.7046, + "slid_loss": 0.6574, + "step": 3372, + "time": 13.31 + }, + { + "epoch": 3.24, + "learning_rate": "1.3323e-04", + "loss": 0.6165, + "slid_loss": 0.6564, + "step": 3373, + "time": 12.79 + }, + { + "epoch": 3.24, + "learning_rate": "1.3320e-04", + "loss": 0.6844, + "slid_loss": 0.6559, + "step": 3374, + "time": 13.16 + }, + { + "epoch": 3.24, + "learning_rate": "1.3317e-04", + "loss": 0.6786, + "slid_loss": 0.6557, + "step": 3375, + "time": 12.91 + }, + { + "epoch": 3.24, + "learning_rate": "1.3313e-04", + "loss": 0.6452, + "slid_loss": 0.6558, + "step": 3376, + "time": 13.3 + }, + { + "epoch": 3.24, + "learning_rate": "1.3310e-04", + "loss": 0.7043, + "slid_loss": 0.6562, + "step": 3377, + "time": 12.9 + }, + { + "epoch": 3.24, + "learning_rate": "1.3307e-04", + "loss": 0.6314, + "slid_loss": 0.6556, + "step": 3378, + "time": 13.06 + }, + { + "epoch": 3.25, + "learning_rate": "1.3304e-04", + "loss": 0.6355, + "slid_loss": 0.6544, + "step": 3379, + "time": 13.28 + }, + { + "epoch": 3.25, + "learning_rate": "1.3301e-04", + "loss": 0.6619, + "slid_loss": 0.6544, + "step": 3380, + "time": 14.28 + }, + { + "epoch": 3.25, + "learning_rate": "1.3298e-04", + "loss": 0.5561, + "slid_loss": 0.6534, + "step": 3381, + "time": 11.04 + }, + { + "epoch": 3.25, + "learning_rate": "1.3294e-04", + "loss": 0.6913, + "slid_loss": 0.6545, + "step": 3382, + "time": 12.01 + }, + { + "epoch": 3.25, + "learning_rate": "1.3291e-04", + "loss": 0.6596, + "slid_loss": 0.6548, + "step": 3383, + "time": 12.38 + }, + { + "epoch": 3.25, + "learning_rate": "1.3288e-04", + "loss": 0.5446, + "slid_loss": 0.6534, + "step": 3384, + "time": 13.36 + }, + { + "epoch": 3.25, + "learning_rate": "1.3285e-04", + "loss": 0.6033, + "slid_loss": 0.6537, + "step": 3385, + "time": 12.03 + }, + { + "epoch": 3.25, + "learning_rate": "1.3282e-04", + "loss": 0.7078, + "slid_loss": 0.6545, + "step": 3386, + "time": 12.92 + }, + { + "epoch": 3.25, + "learning_rate": "1.3279e-04", + "loss": 0.6779, + "slid_loss": 0.6555, + "step": 3387, + "time": 11.91 + }, + { + "epoch": 3.25, + "learning_rate": "1.3276e-04", + "loss": 0.6534, + "slid_loss": 0.6558, + "step": 3388, + "time": 12.87 + }, + { + "epoch": 3.26, + "learning_rate": "1.3272e-04", + "loss": 0.6594, + "slid_loss": 0.6557, + "step": 3389, + "time": 13.4 + }, + { + "epoch": 3.26, + "learning_rate": "1.3269e-04", + "loss": 0.572, + "slid_loss": 0.6545, + "step": 3390, + "time": 13.63 + }, + { + "epoch": 3.26, + "learning_rate": "1.3266e-04", + "loss": 0.7381, + "slid_loss": 0.6561, + "step": 3391, + "time": 13.45 + }, + { + "epoch": 3.26, + "learning_rate": "1.3263e-04", + "loss": 0.6987, + "slid_loss": 0.6573, + "step": 3392, + "time": 13.53 + }, + { + "epoch": 3.26, + "learning_rate": "1.3260e-04", + "loss": 0.6492, + "slid_loss": 0.6568, + "step": 3393, + "time": 10.99 + }, + { + "epoch": 3.26, + "learning_rate": "1.3257e-04", + "loss": 0.6451, + "slid_loss": 0.6564, + "step": 3394, + "time": 13.29 + }, + { + "epoch": 3.26, + "learning_rate": "1.3254e-04", + "loss": 0.6647, + "slid_loss": 0.6554, + "step": 3395, + "time": 11.94 + }, + { + "epoch": 3.26, + "learning_rate": "1.3250e-04", + "loss": 0.7301, + "slid_loss": 0.6552, + "step": 3396, + "time": 13.44 + }, + { + "epoch": 3.26, + "learning_rate": "1.3247e-04", + "loss": 0.6051, + "slid_loss": 0.6549, + "step": 3397, + "time": 13.98 + }, + { + "epoch": 3.26, + "learning_rate": "1.3244e-04", + "loss": 0.6691, + "slid_loss": 0.6553, + "step": 3398, + "time": 13.55 + }, + { + "epoch": 3.27, + "learning_rate": "1.3241e-04", + "loss": 0.5666, + "slid_loss": 0.654, + "step": 3399, + "time": 13.91 + }, + { + "epoch": 3.27, + "learning_rate": "1.3238e-04", + "loss": 0.5507, + "slid_loss": 0.6533, + "step": 3400, + "time": 12.99 + }, + { + "epoch": 3.27, + "learning_rate": "1.3235e-04", + "loss": 0.6497, + "slid_loss": 0.6528, + "step": 3401, + "time": 13.54 + }, + { + "epoch": 3.27, + "learning_rate": "1.3232e-04", + "loss": 0.6674, + "slid_loss": 0.6535, + "step": 3402, + "time": 12.63 + }, + { + "epoch": 3.27, + "learning_rate": "1.3228e-04", + "loss": 0.6314, + "slid_loss": 0.6533, + "step": 3403, + "time": 13.59 + }, + { + "epoch": 3.27, + "learning_rate": "1.3225e-04", + "loss": 0.7013, + "slid_loss": 0.6542, + "step": 3404, + "time": 14.79 + }, + { + "epoch": 3.27, + "learning_rate": "1.3222e-04", + "loss": 0.5831, + "slid_loss": 0.6534, + "step": 3405, + "time": 13.81 + }, + { + "epoch": 3.27, + "learning_rate": "1.3219e-04", + "loss": 0.6826, + "slid_loss": 0.6541, + "step": 3406, + "time": 13.74 + }, + { + "epoch": 3.27, + "learning_rate": "1.3216e-04", + "loss": 0.6146, + "slid_loss": 0.6542, + "step": 3407, + "time": 11.79 + }, + { + "epoch": 3.27, + "learning_rate": "1.3213e-04", + "loss": 0.6602, + "slid_loss": 0.6551, + "step": 3408, + "time": 13.22 + }, + { + "epoch": 3.27, + "learning_rate": "1.3210e-04", + "loss": 0.6843, + "slid_loss": 0.6547, + "step": 3409, + "time": 12.85 + }, + { + "epoch": 3.28, + "learning_rate": "1.3206e-04", + "loss": 0.6797, + "slid_loss": 0.6556, + "step": 3410, + "time": 11.3 + }, + { + "epoch": 3.28, + "learning_rate": "1.3203e-04", + "loss": 0.5883, + "slid_loss": 0.6545, + "step": 3411, + "time": 12.76 + }, + { + "epoch": 3.28, + "learning_rate": "1.3200e-04", + "loss": 0.6742, + "slid_loss": 0.6548, + "step": 3412, + "time": 12.21 + }, + { + "epoch": 3.28, + "learning_rate": "1.3197e-04", + "loss": 0.6273, + "slid_loss": 0.6551, + "step": 3413, + "time": 13.54 + }, + { + "epoch": 3.28, + "learning_rate": "1.3194e-04", + "loss": 0.6446, + "slid_loss": 0.6543, + "step": 3414, + "time": 13.9 + }, + { + "epoch": 3.28, + "learning_rate": "1.3191e-04", + "loss": 0.6479, + "slid_loss": 0.6545, + "step": 3415, + "time": 11.4 + }, + { + "epoch": 3.28, + "learning_rate": "1.3188e-04", + "loss": 0.5691, + "slid_loss": 0.6525, + "step": 3416, + "time": 13.04 + }, + { + "epoch": 3.28, + "learning_rate": "1.3185e-04", + "loss": 0.6312, + "slid_loss": 0.6525, + "step": 3417, + "time": 12.88 + }, + { + "epoch": 3.28, + "learning_rate": "1.3181e-04", + "loss": 0.7112, + "slid_loss": 0.6534, + "step": 3418, + "time": 12.93 + }, + { + "epoch": 3.28, + "learning_rate": "1.3178e-04", + "loss": 0.6582, + "slid_loss": 0.6543, + "step": 3419, + "time": 12.97 + }, + { + "epoch": 3.29, + "learning_rate": "1.3175e-04", + "loss": 0.6069, + "slid_loss": 0.6535, + "step": 3420, + "time": 11.75 + }, + { + "epoch": 3.29, + "learning_rate": "1.3172e-04", + "loss": 0.6481, + "slid_loss": 0.6541, + "step": 3421, + "time": 12.04 + }, + { + "epoch": 3.29, + "learning_rate": "1.3169e-04", + "loss": 0.675, + "slid_loss": 0.6542, + "step": 3422, + "time": 12.91 + }, + { + "epoch": 3.29, + "learning_rate": "1.3166e-04", + "loss": 0.6892, + "slid_loss": 0.6547, + "step": 3423, + "time": 13.01 + }, + { + "epoch": 3.29, + "learning_rate": "1.3163e-04", + "loss": 0.6977, + "slid_loss": 0.6559, + "step": 3424, + "time": 11.8 + }, + { + "epoch": 3.29, + "learning_rate": "1.3160e-04", + "loss": 0.6159, + "slid_loss": 0.6555, + "step": 3425, + "time": 14.28 + }, + { + "epoch": 3.29, + "learning_rate": "1.3157e-04", + "loss": 0.5919, + "slid_loss": 0.6553, + "step": 3426, + "time": 11.19 + }, + { + "epoch": 3.29, + "learning_rate": "1.3153e-04", + "loss": 0.5903, + "slid_loss": 0.6546, + "step": 3427, + "time": 12.0 + }, + { + "epoch": 3.29, + "learning_rate": "1.3150e-04", + "loss": 0.6558, + "slid_loss": 0.6553, + "step": 3428, + "time": 13.7 + }, + { + "epoch": 3.29, + "learning_rate": "1.3147e-04", + "loss": 0.5916, + "slid_loss": 0.6546, + "step": 3429, + "time": 13.15 + }, + { + "epoch": 3.29, + "learning_rate": "1.3144e-04", + "loss": 0.6759, + "slid_loss": 0.6554, + "step": 3430, + "time": 11.11 + }, + { + "epoch": 3.3, + "learning_rate": "1.3141e-04", + "loss": 0.677, + "slid_loss": 0.6554, + "step": 3431, + "time": 13.03 + }, + { + "epoch": 3.3, + "learning_rate": "1.3138e-04", + "loss": 0.6758, + "slid_loss": 0.6558, + "step": 3432, + "time": 12.06 + }, + { + "epoch": 3.3, + "learning_rate": "1.3135e-04", + "loss": 0.6931, + "slid_loss": 0.6554, + "step": 3433, + "time": 12.81 + }, + { + "epoch": 3.3, + "learning_rate": "1.3132e-04", + "loss": 0.6447, + "slid_loss": 0.6547, + "step": 3434, + "time": 13.28 + }, + { + "epoch": 3.3, + "learning_rate": "1.3128e-04", + "loss": 0.7286, + "slid_loss": 0.6549, + "step": 3435, + "time": 13.47 + }, + { + "epoch": 3.3, + "learning_rate": "1.3125e-04", + "loss": 0.5822, + "slid_loss": 0.6536, + "step": 3436, + "time": 13.49 + }, + { + "epoch": 3.3, + "learning_rate": "1.3122e-04", + "loss": 0.6934, + "slid_loss": 0.6537, + "step": 3437, + "time": 13.81 + }, + { + "epoch": 3.3, + "learning_rate": "1.3119e-04", + "loss": 0.7107, + "slid_loss": 0.654, + "step": 3438, + "time": 11.58 + }, + { + "epoch": 3.3, + "learning_rate": "1.3116e-04", + "loss": 0.5944, + "slid_loss": 0.6538, + "step": 3439, + "time": 11.91 + }, + { + "epoch": 3.3, + "learning_rate": "1.3113e-04", + "loss": 0.6438, + "slid_loss": 0.6541, + "step": 3440, + "time": 13.4 + }, + { + "epoch": 3.31, + "learning_rate": "1.3110e-04", + "loss": 0.6453, + "slid_loss": 0.6536, + "step": 3441, + "time": 13.68 + }, + { + "epoch": 3.31, + "learning_rate": "1.3107e-04", + "loss": 0.6844, + "slid_loss": 0.6532, + "step": 3442, + "time": 13.64 + }, + { + "epoch": 3.31, + "learning_rate": "1.3104e-04", + "loss": 0.6456, + "slid_loss": 0.6533, + "step": 3443, + "time": 13.66 + }, + { + "epoch": 3.31, + "learning_rate": "1.3101e-04", + "loss": 0.6169, + "slid_loss": 0.6529, + "step": 3444, + "time": 13.28 + }, + { + "epoch": 3.31, + "learning_rate": "1.3097e-04", + "loss": 0.6461, + "slid_loss": 0.6529, + "step": 3445, + "time": 13.82 + }, + { + "epoch": 3.31, + "learning_rate": "1.3094e-04", + "loss": 0.5884, + "slid_loss": 0.6518, + "step": 3446, + "time": 13.13 + }, + { + "epoch": 3.31, + "learning_rate": "1.3091e-04", + "loss": 0.5456, + "slid_loss": 0.6514, + "step": 3447, + "time": 13.59 + }, + { + "epoch": 3.31, + "learning_rate": "1.3088e-04", + "loss": 0.5895, + "slid_loss": 0.6508, + "step": 3448, + "time": 14.6 + }, + { + "epoch": 3.31, + "learning_rate": "1.3085e-04", + "loss": 0.6834, + "slid_loss": 0.6515, + "step": 3449, + "time": 14.53 + }, + { + "epoch": 3.31, + "learning_rate": "1.3082e-04", + "loss": 0.6749, + "slid_loss": 0.6513, + "step": 3450, + "time": 13.27 + }, + { + "epoch": 3.32, + "learning_rate": "1.3079e-04", + "loss": 0.6424, + "slid_loss": 0.651, + "step": 3451, + "time": 11.83 + }, + { + "epoch": 3.32, + "learning_rate": "1.3076e-04", + "loss": 0.6163, + "slid_loss": 0.6502, + "step": 3452, + "time": 12.81 + }, + { + "epoch": 3.32, + "learning_rate": "1.3073e-04", + "loss": 0.6891, + "slid_loss": 0.6506, + "step": 3453, + "time": 12.36 + }, + { + "epoch": 3.32, + "learning_rate": "1.3070e-04", + "loss": 0.7219, + "slid_loss": 0.6502, + "step": 3454, + "time": 13.8 + }, + { + "epoch": 3.32, + "learning_rate": "1.3066e-04", + "loss": 0.5976, + "slid_loss": 0.6484, + "step": 3455, + "time": 12.75 + }, + { + "epoch": 3.32, + "learning_rate": "1.3063e-04", + "loss": 0.6415, + "slid_loss": 0.6483, + "step": 3456, + "time": 13.41 + }, + { + "epoch": 3.32, + "learning_rate": "1.3060e-04", + "loss": 0.5952, + "slid_loss": 0.647, + "step": 3457, + "time": 12.77 + }, + { + "epoch": 3.32, + "learning_rate": "1.3057e-04", + "loss": 0.6398, + "slid_loss": 0.6478, + "step": 3458, + "time": 11.73 + }, + { + "epoch": 3.32, + "learning_rate": "1.3054e-04", + "loss": 0.6103, + "slid_loss": 0.6467, + "step": 3459, + "time": 13.61 + }, + { + "epoch": 3.32, + "learning_rate": "1.3051e-04", + "loss": 0.7194, + "slid_loss": 0.6467, + "step": 3460, + "time": 13.21 + }, + { + "epoch": 3.32, + "learning_rate": "1.3048e-04", + "loss": 0.6607, + "slid_loss": 0.6467, + "step": 3461, + "time": 12.93 + }, + { + "epoch": 3.33, + "learning_rate": "1.3045e-04", + "loss": 0.7086, + "slid_loss": 0.6482, + "step": 3462, + "time": 13.78 + }, + { + "epoch": 3.33, + "learning_rate": "1.3042e-04", + "loss": 0.6653, + "slid_loss": 0.648, + "step": 3463, + "time": 12.97 + }, + { + "epoch": 3.33, + "learning_rate": "1.3039e-04", + "loss": 0.6694, + "slid_loss": 0.6482, + "step": 3464, + "time": 13.54 + }, + { + "epoch": 3.33, + "learning_rate": "1.3036e-04", + "loss": 0.6441, + "slid_loss": 0.6478, + "step": 3465, + "time": 11.03 + }, + { + "epoch": 3.33, + "learning_rate": "1.3032e-04", + "loss": 0.585, + "slid_loss": 0.6477, + "step": 3466, + "time": 13.75 + }, + { + "epoch": 3.33, + "learning_rate": "1.3029e-04", + "loss": 0.6171, + "slid_loss": 0.6476, + "step": 3467, + "time": 13.13 + }, + { + "epoch": 3.33, + "learning_rate": "1.3026e-04", + "loss": 0.6303, + "slid_loss": 0.6474, + "step": 3468, + "time": 14.66 + }, + { + "epoch": 3.33, + "learning_rate": "1.3023e-04", + "loss": 0.4937, + "slid_loss": 0.6458, + "step": 3469, + "time": 13.61 + }, + { + "epoch": 3.33, + "learning_rate": "1.3020e-04", + "loss": 0.6803, + "slid_loss": 0.6465, + "step": 3470, + "time": 13.28 + }, + { + "epoch": 3.33, + "learning_rate": "1.3017e-04", + "loss": 0.6014, + "slid_loss": 0.6454, + "step": 3471, + "time": 13.64 + }, + { + "epoch": 3.34, + "learning_rate": "1.3014e-04", + "loss": 0.7617, + "slid_loss": 0.646, + "step": 3472, + "time": 12.31 + }, + { + "epoch": 3.34, + "learning_rate": "1.3011e-04", + "loss": 0.6268, + "slid_loss": 0.6461, + "step": 3473, + "time": 13.18 + }, + { + "epoch": 3.34, + "learning_rate": "1.3008e-04", + "loss": 0.5907, + "slid_loss": 0.6452, + "step": 3474, + "time": 12.21 + }, + { + "epoch": 3.34, + "learning_rate": "1.3005e-04", + "loss": 0.5914, + "slid_loss": 0.6443, + "step": 3475, + "time": 13.45 + }, + { + "epoch": 3.34, + "learning_rate": "1.3002e-04", + "loss": 0.7348, + "slid_loss": 0.6452, + "step": 3476, + "time": 13.4 + }, + { + "epoch": 3.34, + "learning_rate": "1.2999e-04", + "loss": 0.5711, + "slid_loss": 0.6439, + "step": 3477, + "time": 13.59 + }, + { + "epoch": 3.34, + "learning_rate": "1.2996e-04", + "loss": 0.6246, + "slid_loss": 0.6438, + "step": 3478, + "time": 13.92 + }, + { + "epoch": 3.34, + "learning_rate": "1.2992e-04", + "loss": 0.7258, + "slid_loss": 0.6447, + "step": 3479, + "time": 14.25 + }, + { + "epoch": 3.34, + "learning_rate": "1.2989e-04", + "loss": 0.6603, + "slid_loss": 0.6447, + "step": 3480, + "time": 14.25 + }, + { + "epoch": 3.34, + "learning_rate": "1.2986e-04", + "loss": 0.5744, + "slid_loss": 0.6449, + "step": 3481, + "time": 14.26 + }, + { + "epoch": 3.34, + "learning_rate": "1.2983e-04", + "loss": 0.6549, + "slid_loss": 0.6445, + "step": 3482, + "time": 12.84 + }, + { + "epoch": 3.35, + "learning_rate": "1.2980e-04", + "loss": 0.5536, + "slid_loss": 0.6434, + "step": 3483, + "time": 14.12 + }, + { + "epoch": 3.35, + "learning_rate": "1.2977e-04", + "loss": 0.7199, + "slid_loss": 0.6452, + "step": 3484, + "time": 13.78 + }, + { + "epoch": 3.35, + "learning_rate": "1.2974e-04", + "loss": 0.6321, + "slid_loss": 0.6455, + "step": 3485, + "time": 12.06 + }, + { + "epoch": 3.35, + "learning_rate": "1.2971e-04", + "loss": 0.6989, + "slid_loss": 0.6454, + "step": 3486, + "time": 11.6 + }, + { + "epoch": 3.35, + "learning_rate": "1.2968e-04", + "loss": 0.6838, + "slid_loss": 0.6454, + "step": 3487, + "time": 12.13 + }, + { + "epoch": 3.35, + "learning_rate": "1.2965e-04", + "loss": 0.5594, + "slid_loss": 0.6445, + "step": 3488, + "time": 12.8 + }, + { + "epoch": 3.35, + "learning_rate": "1.2962e-04", + "loss": 0.7626, + "slid_loss": 0.6455, + "step": 3489, + "time": 11.77 + }, + { + "epoch": 3.35, + "learning_rate": "1.2959e-04", + "loss": 0.7284, + "slid_loss": 0.6471, + "step": 3490, + "time": 13.99 + }, + { + "epoch": 3.35, + "learning_rate": "1.2956e-04", + "loss": 0.5864, + "slid_loss": 0.6456, + "step": 3491, + "time": 13.27 + }, + { + "epoch": 3.35, + "learning_rate": "1.2953e-04", + "loss": 0.6521, + "slid_loss": 0.6451, + "step": 3492, + "time": 14.04 + }, + { + "epoch": 3.36, + "learning_rate": "1.2950e-04", + "loss": 0.6679, + "slid_loss": 0.6453, + "step": 3493, + "time": 14.05 + }, + { + "epoch": 3.36, + "learning_rate": "1.2947e-04", + "loss": 0.6603, + "slid_loss": 0.6455, + "step": 3494, + "time": 13.55 + }, + { + "epoch": 3.36, + "learning_rate": "1.2943e-04", + "loss": 0.6619, + "slid_loss": 0.6454, + "step": 3495, + "time": 13.17 + }, + { + "epoch": 3.36, + "learning_rate": "1.2940e-04", + "loss": 0.6787, + "slid_loss": 0.6449, + "step": 3496, + "time": 13.31 + }, + { + "epoch": 3.36, + "learning_rate": "1.2937e-04", + "loss": 0.55, + "slid_loss": 0.6444, + "step": 3497, + "time": 11.81 + }, + { + "epoch": 3.36, + "learning_rate": "1.2934e-04", + "loss": 0.6663, + "slid_loss": 0.6443, + "step": 3498, + "time": 13.55 + }, + { + "epoch": 3.36, + "learning_rate": "1.2931e-04", + "loss": 0.6003, + "slid_loss": 0.6447, + "step": 3499, + "time": 13.9 + }, + { + "epoch": 3.36, + "learning_rate": "1.2928e-04", + "loss": 0.6255, + "slid_loss": 0.6454, + "step": 3500, + "time": 12.19 + }, + { + "epoch": 3.36, + "learning_rate": "1.2925e-04", + "loss": 0.7323, + "slid_loss": 0.6462, + "step": 3501, + "time": 12.65 + }, + { + "epoch": 3.36, + "learning_rate": "1.2922e-04", + "loss": 0.6841, + "slid_loss": 0.6464, + "step": 3502, + "time": 13.95 + }, + { + "epoch": 3.37, + "learning_rate": "1.2919e-04", + "loss": 0.6956, + "slid_loss": 0.6471, + "step": 3503, + "time": 13.21 + }, + { + "epoch": 3.37, + "learning_rate": "1.2916e-04", + "loss": 0.5467, + "slid_loss": 0.6455, + "step": 3504, + "time": 13.5 + }, + { + "epoch": 3.37, + "learning_rate": "1.2913e-04", + "loss": 0.6758, + "slid_loss": 0.6464, + "step": 3505, + "time": 13.75 + }, + { + "epoch": 3.37, + "learning_rate": "1.2910e-04", + "loss": 0.669, + "slid_loss": 0.6463, + "step": 3506, + "time": 13.45 + }, + { + "epoch": 3.37, + "learning_rate": "1.2907e-04", + "loss": 0.6283, + "slid_loss": 0.6464, + "step": 3507, + "time": 13.23 + }, + { + "epoch": 3.37, + "learning_rate": "1.2904e-04", + "loss": 0.681, + "slid_loss": 0.6466, + "step": 3508, + "time": 11.13 + }, + { + "epoch": 3.37, + "learning_rate": "1.2901e-04", + "loss": 0.6405, + "slid_loss": 0.6462, + "step": 3509, + "time": 13.62 + }, + { + "epoch": 3.37, + "learning_rate": "1.2898e-04", + "loss": 0.6772, + "slid_loss": 0.6462, + "step": 3510, + "time": 12.84 + }, + { + "epoch": 3.37, + "learning_rate": "1.2895e-04", + "loss": 0.688, + "slid_loss": 0.6472, + "step": 3511, + "time": 13.25 + }, + { + "epoch": 3.37, + "learning_rate": "1.2892e-04", + "loss": 0.6511, + "slid_loss": 0.647, + "step": 3512, + "time": 12.77 + }, + { + "epoch": 3.37, + "learning_rate": "1.2889e-04", + "loss": 0.6843, + "slid_loss": 0.6475, + "step": 3513, + "time": 14.25 + }, + { + "epoch": 3.38, + "learning_rate": "1.2886e-04", + "loss": 0.6292, + "slid_loss": 0.6474, + "step": 3514, + "time": 11.59 + }, + { + "epoch": 3.38, + "learning_rate": "1.2883e-04", + "loss": 0.6, + "slid_loss": 0.6469, + "step": 3515, + "time": 13.15 + }, + { + "epoch": 3.38, + "learning_rate": "1.2879e-04", + "loss": 0.6872, + "slid_loss": 0.6481, + "step": 3516, + "time": 12.88 + }, + { + "epoch": 3.38, + "learning_rate": "1.2876e-04", + "loss": 0.58, + "slid_loss": 0.6476, + "step": 3517, + "time": 13.24 + }, + { + "epoch": 3.38, + "learning_rate": "1.2873e-04", + "loss": 0.6752, + "slid_loss": 0.6472, + "step": 3518, + "time": 12.87 + }, + { + "epoch": 3.38, + "learning_rate": "1.2870e-04", + "loss": 0.628, + "slid_loss": 0.6469, + "step": 3519, + "time": 12.17 + }, + { + "epoch": 3.38, + "learning_rate": "1.2867e-04", + "loss": 0.5855, + "slid_loss": 0.6467, + "step": 3520, + "time": 13.97 + }, + { + "epoch": 3.38, + "learning_rate": "1.2864e-04", + "loss": 0.6558, + "slid_loss": 0.6468, + "step": 3521, + "time": 13.16 + }, + { + "epoch": 3.38, + "learning_rate": "1.2861e-04", + "loss": 0.6584, + "slid_loss": 0.6466, + "step": 3522, + "time": 11.02 + }, + { + "epoch": 3.38, + "learning_rate": "1.2858e-04", + "loss": 0.5994, + "slid_loss": 0.6457, + "step": 3523, + "time": 12.43 + }, + { + "epoch": 3.39, + "learning_rate": "1.2855e-04", + "loss": 0.7109, + "slid_loss": 0.6458, + "step": 3524, + "time": 11.62 + }, + { + "epoch": 3.39, + "learning_rate": "1.2852e-04", + "loss": 0.6493, + "slid_loss": 0.6462, + "step": 3525, + "time": 12.71 + }, + { + "epoch": 3.39, + "learning_rate": "1.2849e-04", + "loss": 0.7536, + "slid_loss": 0.6478, + "step": 3526, + "time": 12.29 + }, + { + "epoch": 3.39, + "learning_rate": "1.2846e-04", + "loss": 0.5932, + "slid_loss": 0.6478, + "step": 3527, + "time": 13.61 + }, + { + "epoch": 3.39, + "learning_rate": "1.2843e-04", + "loss": 0.5796, + "slid_loss": 0.647, + "step": 3528, + "time": 13.47 + }, + { + "epoch": 3.39, + "learning_rate": "1.2840e-04", + "loss": 0.6818, + "slid_loss": 0.6479, + "step": 3529, + "time": 11.98 + }, + { + "epoch": 3.39, + "learning_rate": "1.2837e-04", + "loss": 0.6215, + "slid_loss": 0.6474, + "step": 3530, + "time": 13.55 + }, + { + "epoch": 3.39, + "learning_rate": "1.2834e-04", + "loss": 0.6867, + "slid_loss": 0.6475, + "step": 3531, + "time": 13.39 + }, + { + "epoch": 3.39, + "learning_rate": "1.2831e-04", + "loss": 0.6833, + "slid_loss": 0.6476, + "step": 3532, + "time": 13.06 + }, + { + "epoch": 3.39, + "learning_rate": "1.2828e-04", + "loss": 0.666, + "slid_loss": 0.6473, + "step": 3533, + "time": 13.91 + }, + { + "epoch": 3.39, + "learning_rate": "1.2825e-04", + "loss": 0.6796, + "slid_loss": 0.6477, + "step": 3534, + "time": 11.25 + }, + { + "epoch": 3.4, + "learning_rate": "1.2822e-04", + "loss": 0.6454, + "slid_loss": 0.6468, + "step": 3535, + "time": 11.66 + }, + { + "epoch": 3.4, + "learning_rate": "1.2819e-04", + "loss": 0.6689, + "slid_loss": 0.6477, + "step": 3536, + "time": 11.97 + }, + { + "epoch": 3.4, + "learning_rate": "1.2816e-04", + "loss": 0.6349, + "slid_loss": 0.6471, + "step": 3537, + "time": 13.73 + }, + { + "epoch": 3.4, + "learning_rate": "1.2813e-04", + "loss": 0.6409, + "slid_loss": 0.6464, + "step": 3538, + "time": 12.1 + }, + { + "epoch": 3.4, + "learning_rate": "1.2810e-04", + "loss": 0.5303, + "slid_loss": 0.6458, + "step": 3539, + "time": 12.94 + }, + { + "epoch": 3.4, + "learning_rate": "1.2807e-04", + "loss": 0.6753, + "slid_loss": 0.6461, + "step": 3540, + "time": 13.9 + }, + { + "epoch": 3.4, + "learning_rate": "1.2804e-04", + "loss": 0.5645, + "slid_loss": 0.6453, + "step": 3541, + "time": 12.02 + }, + { + "epoch": 3.4, + "learning_rate": "1.2801e-04", + "loss": 0.6181, + "slid_loss": 0.6446, + "step": 3542, + "time": 13.41 + }, + { + "epoch": 3.4, + "learning_rate": "1.2798e-04", + "loss": 0.646, + "slid_loss": 0.6446, + "step": 3543, + "time": 13.74 + }, + { + "epoch": 3.4, + "learning_rate": "1.2795e-04", + "loss": 0.7002, + "slid_loss": 0.6454, + "step": 3544, + "time": 13.46 + }, + { + "epoch": 3.41, + "learning_rate": "1.2792e-04", + "loss": 0.553, + "slid_loss": 0.6445, + "step": 3545, + "time": 14.01 + }, + { + "epoch": 3.41, + "learning_rate": "1.2789e-04", + "loss": 0.559, + "slid_loss": 0.6442, + "step": 3546, + "time": 12.87 + }, + { + "epoch": 3.41, + "learning_rate": "1.2786e-04", + "loss": 0.6336, + "slid_loss": 0.6451, + "step": 3547, + "time": 13.24 + }, + { + "epoch": 3.41, + "learning_rate": "1.2783e-04", + "loss": 0.6509, + "slid_loss": 0.6457, + "step": 3548, + "time": 13.55 + }, + { + "epoch": 3.41, + "learning_rate": "1.2780e-04", + "loss": 0.5755, + "slid_loss": 0.6446, + "step": 3549, + "time": 13.39 + }, + { + "epoch": 3.41, + "learning_rate": "1.2777e-04", + "loss": 0.6299, + "slid_loss": 0.6442, + "step": 3550, + "time": 14.23 + }, + { + "epoch": 3.41, + "learning_rate": "1.2774e-04", + "loss": 0.6328, + "slid_loss": 0.6441, + "step": 3551, + "time": 12.79 + }, + { + "epoch": 3.41, + "learning_rate": "1.2771e-04", + "loss": 0.652, + "slid_loss": 0.6444, + "step": 3552, + "time": 12.41 + }, + { + "epoch": 3.41, + "learning_rate": "1.2768e-04", + "loss": 0.5931, + "slid_loss": 0.6435, + "step": 3553, + "time": 11.94 + }, + { + "epoch": 3.41, + "learning_rate": "1.2765e-04", + "loss": 0.6126, + "slid_loss": 0.6424, + "step": 3554, + "time": 12.12 + }, + { + "epoch": 3.41, + "learning_rate": "1.2762e-04", + "loss": 0.6667, + "slid_loss": 0.6431, + "step": 3555, + "time": 13.81 + }, + { + "epoch": 3.42, + "learning_rate": "1.2759e-04", + "loss": 0.547, + "slid_loss": 0.6421, + "step": 3556, + "time": 12.46 + }, + { + "epoch": 3.42, + "learning_rate": "1.2756e-04", + "loss": 0.5948, + "slid_loss": 0.6421, + "step": 3557, + "time": 13.03 + }, + { + "epoch": 3.42, + "learning_rate": "1.2753e-04", + "loss": 0.5367, + "slid_loss": 0.6411, + "step": 3558, + "time": 13.26 + }, + { + "epoch": 3.42, + "learning_rate": "1.2750e-04", + "loss": 0.6905, + "slid_loss": 0.6419, + "step": 3559, + "time": 14.3 + }, + { + "epoch": 3.42, + "learning_rate": "1.2747e-04", + "loss": 0.6102, + "slid_loss": 0.6408, + "step": 3560, + "time": 13.41 + }, + { + "epoch": 3.42, + "learning_rate": "1.2744e-04", + "loss": 0.641, + "slid_loss": 0.6406, + "step": 3561, + "time": 12.96 + }, + { + "epoch": 3.42, + "learning_rate": "1.2741e-04", + "loss": 0.7308, + "slid_loss": 0.6408, + "step": 3562, + "time": 13.07 + }, + { + "epoch": 3.42, + "learning_rate": "1.2738e-04", + "loss": 0.6055, + "slid_loss": 0.6402, + "step": 3563, + "time": 13.41 + }, + { + "epoch": 3.42, + "learning_rate": "1.2735e-04", + "loss": 0.6004, + "slid_loss": 0.6395, + "step": 3564, + "time": 13.28 + }, + { + "epoch": 3.42, + "learning_rate": "1.2732e-04", + "loss": 0.6324, + "slid_loss": 0.6394, + "step": 3565, + "time": 13.25 + }, + { + "epoch": 3.43, + "learning_rate": "1.2729e-04", + "loss": 0.7346, + "slid_loss": 0.6409, + "step": 3566, + "time": 13.35 + }, + { + "epoch": 3.43, + "learning_rate": "1.2726e-04", + "loss": 0.5746, + "slid_loss": 0.6405, + "step": 3567, + "time": 13.3 + }, + { + "epoch": 3.43, + "learning_rate": "1.2723e-04", + "loss": 0.683, + "slid_loss": 0.641, + "step": 3568, + "time": 13.18 + }, + { + "epoch": 3.43, + "learning_rate": "1.2720e-04", + "loss": 0.7055, + "slid_loss": 0.6431, + "step": 3569, + "time": 13.3 + }, + { + "epoch": 3.43, + "learning_rate": "1.2717e-04", + "loss": 0.5434, + "slid_loss": 0.6418, + "step": 3570, + "time": 13.54 + }, + { + "epoch": 3.43, + "learning_rate": "1.2714e-04", + "loss": 0.7103, + "slid_loss": 0.6429, + "step": 3571, + "time": 13.42 + }, + { + "epoch": 3.43, + "learning_rate": "1.2711e-04", + "loss": 0.6043, + "slid_loss": 0.6413, + "step": 3572, + "time": 10.97 + }, + { + "epoch": 3.43, + "learning_rate": "1.2708e-04", + "loss": 0.5866, + "slid_loss": 0.6409, + "step": 3573, + "time": 14.03 + }, + { + "epoch": 3.43, + "learning_rate": "1.2705e-04", + "loss": 0.6503, + "slid_loss": 0.6415, + "step": 3574, + "time": 12.86 + }, + { + "epoch": 3.43, + "learning_rate": "1.2702e-04", + "loss": 0.6379, + "slid_loss": 0.642, + "step": 3575, + "time": 13.66 + }, + { + "epoch": 3.44, + "learning_rate": "1.2699e-04", + "loss": 0.6156, + "slid_loss": 0.6408, + "step": 3576, + "time": 12.8 + }, + { + "epoch": 3.44, + "learning_rate": "1.2696e-04", + "loss": 0.6556, + "slid_loss": 0.6416, + "step": 3577, + "time": 11.37 + }, + { + "epoch": 3.44, + "learning_rate": "1.2693e-04", + "loss": 0.5855, + "slid_loss": 0.6412, + "step": 3578, + "time": 13.44 + }, + { + "epoch": 3.44, + "learning_rate": "1.2690e-04", + "loss": 0.5914, + "slid_loss": 0.6399, + "step": 3579, + "time": 12.26 + }, + { + "epoch": 3.44, + "learning_rate": "1.2687e-04", + "loss": 0.7214, + "slid_loss": 0.6405, + "step": 3580, + "time": 13.29 + }, + { + "epoch": 3.44, + "learning_rate": "1.2684e-04", + "loss": 0.6035, + "slid_loss": 0.6408, + "step": 3581, + "time": 11.65 + }, + { + "epoch": 3.44, + "learning_rate": "1.2681e-04", + "loss": 0.6409, + "slid_loss": 0.6406, + "step": 3582, + "time": 12.9 + }, + { + "epoch": 3.44, + "learning_rate": "1.2678e-04", + "loss": 0.6081, + "slid_loss": 0.6412, + "step": 3583, + "time": 13.67 + }, + { + "epoch": 3.44, + "learning_rate": "1.2675e-04", + "loss": 0.6786, + "slid_loss": 0.6408, + "step": 3584, + "time": 13.15 + }, + { + "epoch": 3.44, + "learning_rate": "1.2672e-04", + "loss": 0.6357, + "slid_loss": 0.6408, + "step": 3585, + "time": 13.39 + }, + { + "epoch": 3.44, + "learning_rate": "1.2669e-04", + "loss": 0.7018, + "slid_loss": 0.6408, + "step": 3586, + "time": 14.03 + }, + { + "epoch": 3.45, + "learning_rate": "1.2666e-04", + "loss": 0.5928, + "slid_loss": 0.6399, + "step": 3587, + "time": 12.91 + }, + { + "epoch": 3.45, + "learning_rate": "1.2663e-04", + "loss": 0.6409, + "slid_loss": 0.6407, + "step": 3588, + "time": 14.53 + }, + { + "epoch": 3.45, + "learning_rate": "1.2660e-04", + "loss": 0.5577, + "slid_loss": 0.6387, + "step": 3589, + "time": 12.78 + }, + { + "epoch": 3.45, + "learning_rate": "1.2657e-04", + "loss": 0.6085, + "slid_loss": 0.6375, + "step": 3590, + "time": 13.24 + }, + { + "epoch": 3.45, + "learning_rate": "1.2654e-04", + "loss": 0.6134, + "slid_loss": 0.6378, + "step": 3591, + "time": 13.89 + }, + { + "epoch": 3.45, + "learning_rate": "1.2652e-04", + "loss": 0.6011, + "slid_loss": 0.6372, + "step": 3592, + "time": 12.05 + }, + { + "epoch": 3.45, + "learning_rate": "1.2649e-04", + "loss": 0.6214, + "slid_loss": 0.6368, + "step": 3593, + "time": 11.53 + }, + { + "epoch": 3.45, + "learning_rate": "1.2646e-04", + "loss": 0.6803, + "slid_loss": 0.637, + "step": 3594, + "time": 12.28 + }, + { + "epoch": 3.45, + "learning_rate": "1.2643e-04", + "loss": 0.5855, + "slid_loss": 0.6362, + "step": 3595, + "time": 12.9 + }, + { + "epoch": 3.45, + "learning_rate": "1.2640e-04", + "loss": 0.6124, + "slid_loss": 0.6356, + "step": 3596, + "time": 13.04 + }, + { + "epoch": 3.46, + "learning_rate": "1.2637e-04", + "loss": 0.5724, + "slid_loss": 0.6358, + "step": 3597, + "time": 13.99 + }, + { + "epoch": 3.46, + "learning_rate": "1.2634e-04", + "loss": 0.6265, + "slid_loss": 0.6354, + "step": 3598, + "time": 12.68 + }, + { + "epoch": 3.46, + "learning_rate": "1.2631e-04", + "loss": 0.674, + "slid_loss": 0.6361, + "step": 3599, + "time": 13.8 + }, + { + "epoch": 3.46, + "learning_rate": "1.2628e-04", + "loss": 0.6898, + "slid_loss": 0.6368, + "step": 3600, + "time": 11.52 + }, + { + "epoch": 3.46, + "learning_rate": "1.2625e-04", + "loss": 0.6118, + "slid_loss": 0.6356, + "step": 3601, + "time": 13.38 + }, + { + "epoch": 3.46, + "learning_rate": "1.2622e-04", + "loss": 0.5733, + "slid_loss": 0.6344, + "step": 3602, + "time": 11.86 + }, + { + "epoch": 3.46, + "learning_rate": "1.2619e-04", + "loss": 0.5743, + "slid_loss": 0.6332, + "step": 3603, + "time": 13.39 + }, + { + "epoch": 3.46, + "learning_rate": "1.2616e-04", + "loss": 0.6987, + "slid_loss": 0.6348, + "step": 3604, + "time": 13.15 + }, + { + "epoch": 3.46, + "learning_rate": "1.2613e-04", + "loss": 0.6988, + "slid_loss": 0.635, + "step": 3605, + "time": 13.96 + }, + { + "epoch": 3.46, + "learning_rate": "1.2610e-04", + "loss": 0.6629, + "slid_loss": 0.6349, + "step": 3606, + "time": 12.89 + }, + { + "epoch": 3.46, + "learning_rate": "1.2607e-04", + "loss": 0.6779, + "slid_loss": 0.6354, + "step": 3607, + "time": 14.14 + }, + { + "epoch": 3.47, + "learning_rate": "1.2604e-04", + "loss": 0.6764, + "slid_loss": 0.6354, + "step": 3608, + "time": 13.97 + }, + { + "epoch": 3.47, + "learning_rate": "1.2601e-04", + "loss": 0.6801, + "slid_loss": 0.6358, + "step": 3609, + "time": 12.98 + }, + { + "epoch": 3.47, + "learning_rate": "1.2598e-04", + "loss": 0.6252, + "slid_loss": 0.6353, + "step": 3610, + "time": 13.15 + }, + { + "epoch": 3.47, + "learning_rate": "1.2595e-04", + "loss": 0.7153, + "slid_loss": 0.6355, + "step": 3611, + "time": 13.93 + }, + { + "epoch": 3.47, + "learning_rate": "1.2593e-04", + "loss": 0.6147, + "slid_loss": 0.6352, + "step": 3612, + "time": 13.81 + }, + { + "epoch": 3.47, + "learning_rate": "1.2590e-04", + "loss": 0.6994, + "slid_loss": 0.6353, + "step": 3613, + "time": 13.22 + }, + { + "epoch": 3.47, + "learning_rate": "1.2587e-04", + "loss": 0.563, + "slid_loss": 0.6346, + "step": 3614, + "time": 13.78 + }, + { + "epoch": 3.47, + "learning_rate": "1.2584e-04", + "loss": 0.6523, + "slid_loss": 0.6352, + "step": 3615, + "time": 11.88 + }, + { + "epoch": 3.47, + "learning_rate": "1.2581e-04", + "loss": 0.6654, + "slid_loss": 0.635, + "step": 3616, + "time": 13.63 + }, + { + "epoch": 3.47, + "learning_rate": "1.2578e-04", + "loss": 0.6297, + "slid_loss": 0.6355, + "step": 3617, + "time": 12.13 + }, + { + "epoch": 3.48, + "learning_rate": "1.2575e-04", + "loss": 0.5472, + "slid_loss": 0.6342, + "step": 3618, + "time": 12.92 + }, + { + "epoch": 3.48, + "learning_rate": "1.2572e-04", + "loss": 0.5821, + "slid_loss": 0.6337, + "step": 3619, + "time": 11.27 + }, + { + "epoch": 3.48, + "learning_rate": "1.2569e-04", + "loss": 0.6775, + "slid_loss": 0.6346, + "step": 3620, + "time": 13.49 + }, + { + "epoch": 3.48, + "learning_rate": "1.2566e-04", + "loss": 0.5538, + "slid_loss": 0.6336, + "step": 3621, + "time": 13.15 + }, + { + "epoch": 3.48, + "learning_rate": "1.2563e-04", + "loss": 0.7229, + "slid_loss": 0.6343, + "step": 3622, + "time": 13.33 + }, + { + "epoch": 3.48, + "learning_rate": "1.2560e-04", + "loss": 0.7131, + "slid_loss": 0.6354, + "step": 3623, + "time": 13.49 + }, + { + "epoch": 3.48, + "learning_rate": "1.2557e-04", + "loss": 0.6274, + "slid_loss": 0.6346, + "step": 3624, + "time": 13.4 + }, + { + "epoch": 3.48, + "learning_rate": "1.2554e-04", + "loss": 0.6581, + "slid_loss": 0.6346, + "step": 3625, + "time": 13.51 + }, + { + "epoch": 3.48, + "learning_rate": "1.2551e-04", + "loss": 0.6739, + "slid_loss": 0.6338, + "step": 3626, + "time": 13.99 + }, + { + "epoch": 3.48, + "learning_rate": "1.2549e-04", + "loss": 0.6736, + "slid_loss": 0.6347, + "step": 3627, + "time": 12.33 + }, + { + "epoch": 3.49, + "learning_rate": "1.2546e-04", + "loss": 0.6485, + "slid_loss": 0.6353, + "step": 3628, + "time": 14.18 + }, + { + "epoch": 3.49, + "learning_rate": "1.2543e-04", + "loss": 0.6916, + "slid_loss": 0.6354, + "step": 3629, + "time": 12.03 + }, + { + "epoch": 3.49, + "learning_rate": "1.2540e-04", + "loss": 0.623, + "slid_loss": 0.6355, + "step": 3630, + "time": 13.25 + }, + { + "epoch": 3.49, + "learning_rate": "1.2537e-04", + "loss": 0.6181, + "slid_loss": 0.6348, + "step": 3631, + "time": 13.28 + }, + { + "epoch": 3.49, + "learning_rate": "1.2534e-04", + "loss": 0.7341, + "slid_loss": 0.6353, + "step": 3632, + "time": 13.3 + }, + { + "epoch": 3.49, + "learning_rate": "1.2531e-04", + "loss": 0.6094, + "slid_loss": 0.6347, + "step": 3633, + "time": 13.23 + }, + { + "epoch": 3.49, + "learning_rate": "1.2528e-04", + "loss": 0.6378, + "slid_loss": 0.6343, + "step": 3634, + "time": 13.37 + }, + { + "epoch": 3.49, + "learning_rate": "1.2525e-04", + "loss": 0.5685, + "slid_loss": 0.6335, + "step": 3635, + "time": 10.91 + }, + { + "epoch": 3.49, + "learning_rate": "1.2522e-04", + "loss": 0.7015, + "slid_loss": 0.6339, + "step": 3636, + "time": 11.69 + }, + { + "epoch": 3.49, + "learning_rate": "1.2519e-04", + "loss": 0.5585, + "slid_loss": 0.6331, + "step": 3637, + "time": 12.72 + }, + { + "epoch": 3.49, + "learning_rate": "1.2516e-04", + "loss": 0.6683, + "slid_loss": 0.6334, + "step": 3638, + "time": 11.48 + }, + { + "epoch": 3.5, + "learning_rate": "1.2514e-04", + "loss": 0.6163, + "slid_loss": 0.6342, + "step": 3639, + "time": 13.91 + }, + { + "epoch": 3.5, + "learning_rate": "1.2511e-04", + "loss": 0.6022, + "slid_loss": 0.6335, + "step": 3640, + "time": 12.93 + }, + { + "epoch": 3.5, + "learning_rate": "1.2508e-04", + "loss": 0.6531, + "slid_loss": 0.6344, + "step": 3641, + "time": 13.4 + }, + { + "epoch": 3.5, + "learning_rate": "1.2505e-04", + "loss": 0.5738, + "slid_loss": 0.6339, + "step": 3642, + "time": 13.89 + }, + { + "epoch": 3.5, + "learning_rate": "1.2502e-04", + "loss": 0.6652, + "slid_loss": 0.6341, + "step": 3643, + "time": 11.3 + }, + { + "epoch": 3.5, + "learning_rate": "1.2499e-04", + "loss": 0.6867, + "slid_loss": 0.634, + "step": 3644, + "time": 13.78 + }, + { + "epoch": 3.5, + "learning_rate": "1.2496e-04", + "loss": 0.6619, + "slid_loss": 0.6351, + "step": 3645, + "time": 13.24 + }, + { + "epoch": 3.5, + "learning_rate": "1.2493e-04", + "loss": 0.8242, + "slid_loss": 0.6377, + "step": 3646, + "time": 14.06 + }, + { + "epoch": 3.5, + "learning_rate": "1.2490e-04", + "loss": 0.5785, + "slid_loss": 0.6372, + "step": 3647, + "time": 13.32 + }, + { + "epoch": 3.5, + "learning_rate": "1.2487e-04", + "loss": 0.6673, + "slid_loss": 0.6373, + "step": 3648, + "time": 13.52 + }, + { + "epoch": 3.51, + "learning_rate": "1.2485e-04", + "loss": 0.7464, + "slid_loss": 0.6391, + "step": 3649, + "time": 12.13 + }, + { + "epoch": 3.51, + "learning_rate": "1.2482e-04", + "loss": 0.6248, + "slid_loss": 0.639, + "step": 3650, + "time": 13.26 + }, + { + "epoch": 3.51, + "learning_rate": "1.2479e-04", + "loss": 0.6646, + "slid_loss": 0.6393, + "step": 3651, + "time": 13.39 + }, + { + "epoch": 3.51, + "learning_rate": "1.2476e-04", + "loss": 0.6298, + "slid_loss": 0.6391, + "step": 3652, + "time": 13.26 + }, + { + "epoch": 3.51, + "learning_rate": "1.2473e-04", + "loss": 0.5877, + "slid_loss": 0.639, + "step": 3653, + "time": 13.19 + }, + { + "epoch": 3.51, + "learning_rate": "1.2470e-04", + "loss": 0.6441, + "slid_loss": 0.6394, + "step": 3654, + "time": 13.41 + }, + { + "epoch": 3.51, + "learning_rate": "1.2467e-04", + "loss": 0.7023, + "slid_loss": 0.6397, + "step": 3655, + "time": 12.45 + }, + { + "epoch": 3.51, + "learning_rate": "1.2464e-04", + "loss": 0.646, + "slid_loss": 0.6407, + "step": 3656, + "time": 13.68 + }, + { + "epoch": 3.51, + "learning_rate": "1.2461e-04", + "loss": 0.6667, + "slid_loss": 0.6414, + "step": 3657, + "time": 13.72 + }, + { + "epoch": 3.51, + "learning_rate": "1.2458e-04", + "loss": 0.5873, + "slid_loss": 0.6419, + "step": 3658, + "time": 13.17 + }, + { + "epoch": 3.51, + "learning_rate": "1.2456e-04", + "loss": 0.6373, + "slid_loss": 0.6414, + "step": 3659, + "time": 13.43 + }, + { + "epoch": 3.52, + "learning_rate": "1.2453e-04", + "loss": 0.5847, + "slid_loss": 0.6411, + "step": 3660, + "time": 11.32 + }, + { + "epoch": 3.52, + "learning_rate": "1.2450e-04", + "loss": 0.602, + "slid_loss": 0.6408, + "step": 3661, + "time": 12.84 + }, + { + "epoch": 3.52, + "learning_rate": "1.2447e-04", + "loss": 0.6551, + "slid_loss": 0.64, + "step": 3662, + "time": 13.73 + }, + { + "epoch": 3.52, + "learning_rate": "1.2444e-04", + "loss": 0.6408, + "slid_loss": 0.6403, + "step": 3663, + "time": 13.31 + }, + { + "epoch": 3.52, + "learning_rate": "1.2441e-04", + "loss": 0.61, + "slid_loss": 0.6404, + "step": 3664, + "time": 11.63 + }, + { + "epoch": 3.52, + "learning_rate": "1.2438e-04", + "loss": 0.5255, + "slid_loss": 0.6394, + "step": 3665, + "time": 13.72 + }, + { + "epoch": 3.52, + "learning_rate": "1.2435e-04", + "loss": 0.6167, + "slid_loss": 0.6382, + "step": 3666, + "time": 13.21 + }, + { + "epoch": 3.52, + "learning_rate": "1.2433e-04", + "loss": 0.5792, + "slid_loss": 0.6382, + "step": 3667, + "time": 11.14 + }, + { + "epoch": 3.52, + "learning_rate": "1.2430e-04", + "loss": 0.546, + "slid_loss": 0.6369, + "step": 3668, + "time": 12.9 + }, + { + "epoch": 3.52, + "learning_rate": "1.2427e-04", + "loss": 0.6492, + "slid_loss": 0.6363, + "step": 3669, + "time": 13.45 + }, + { + "epoch": 3.53, + "learning_rate": "1.2424e-04", + "loss": 0.6068, + "slid_loss": 0.6369, + "step": 3670, + "time": 12.97 + }, + { + "epoch": 3.53, + "learning_rate": "1.2421e-04", + "loss": 0.6387, + "slid_loss": 0.6362, + "step": 3671, + "time": 11.7 + }, + { + "epoch": 3.53, + "learning_rate": "1.2418e-04", + "loss": 0.6768, + "slid_loss": 0.637, + "step": 3672, + "time": 12.7 + }, + { + "epoch": 3.53, + "learning_rate": "1.2415e-04", + "loss": 0.55, + "slid_loss": 0.6366, + "step": 3673, + "time": 13.28 + }, + { + "epoch": 3.53, + "learning_rate": "1.2412e-04", + "loss": 0.5133, + "slid_loss": 0.6352, + "step": 3674, + "time": 13.28 + }, + { + "epoch": 3.53, + "learning_rate": "1.2410e-04", + "loss": 0.6089, + "slid_loss": 0.6349, + "step": 3675, + "time": 12.82 + }, + { + "epoch": 3.53, + "learning_rate": "1.2407e-04", + "loss": 0.6283, + "slid_loss": 0.6351, + "step": 3676, + "time": 13.16 + }, + { + "epoch": 3.53, + "learning_rate": "1.2404e-04", + "loss": 0.5439, + "slid_loss": 0.6339, + "step": 3677, + "time": 14.07 + }, + { + "epoch": 3.53, + "learning_rate": "1.2401e-04", + "loss": 0.6336, + "slid_loss": 0.6344, + "step": 3678, + "time": 13.71 + }, + { + "epoch": 3.53, + "learning_rate": "1.2398e-04", + "loss": 0.5126, + "slid_loss": 0.6336, + "step": 3679, + "time": 13.02 + }, + { + "epoch": 3.54, + "learning_rate": "1.2395e-04", + "loss": 0.6213, + "slid_loss": 0.6326, + "step": 3680, + "time": 13.56 + }, + { + "epoch": 3.54, + "learning_rate": "1.2392e-04", + "loss": 0.6166, + "slid_loss": 0.6328, + "step": 3681, + "time": 12.93 + }, + { + "epoch": 3.54, + "learning_rate": "1.2390e-04", + "loss": 0.5478, + "slid_loss": 0.6318, + "step": 3682, + "time": 12.26 + }, + { + "epoch": 3.54, + "learning_rate": "1.2387e-04", + "loss": 0.6438, + "slid_loss": 0.6322, + "step": 3683, + "time": 13.77 + }, + { + "epoch": 3.54, + "learning_rate": "1.2384e-04", + "loss": 0.6406, + "slid_loss": 0.6318, + "step": 3684, + "time": 11.6 + }, + { + "epoch": 3.54, + "learning_rate": "1.2381e-04", + "loss": 0.7252, + "slid_loss": 0.6327, + "step": 3685, + "time": 14.03 + }, + { + "epoch": 3.54, + "learning_rate": "1.2378e-04", + "loss": 0.5925, + "slid_loss": 0.6316, + "step": 3686, + "time": 12.37 + }, + { + "epoch": 3.54, + "learning_rate": "1.2375e-04", + "loss": 0.6093, + "slid_loss": 0.6318, + "step": 3687, + "time": 13.7 + }, + { + "epoch": 3.54, + "learning_rate": "1.2372e-04", + "loss": 0.5485, + "slid_loss": 0.6308, + "step": 3688, + "time": 13.39 + }, + { + "epoch": 3.54, + "learning_rate": "1.2370e-04", + "loss": 0.5689, + "slid_loss": 0.631, + "step": 3689, + "time": 12.91 + }, + { + "epoch": 3.54, + "learning_rate": "1.2367e-04", + "loss": 0.5983, + "slid_loss": 0.6309, + "step": 3690, + "time": 13.04 + }, + { + "epoch": 3.55, + "learning_rate": "1.2364e-04", + "loss": 0.7227, + "slid_loss": 0.6319, + "step": 3691, + "time": 13.81 + }, + { + "epoch": 3.55, + "learning_rate": "1.2361e-04", + "loss": 0.6128, + "slid_loss": 0.6321, + "step": 3692, + "time": 11.35 + }, + { + "epoch": 3.55, + "learning_rate": "1.2358e-04", + "loss": 0.5602, + "slid_loss": 0.6315, + "step": 3693, + "time": 11.51 + }, + { + "epoch": 3.55, + "learning_rate": "1.2355e-04", + "loss": 0.6116, + "slid_loss": 0.6308, + "step": 3694, + "time": 11.93 + }, + { + "epoch": 3.55, + "learning_rate": "1.2352e-04", + "loss": 0.572, + "slid_loss": 0.6306, + "step": 3695, + "time": 12.26 + }, + { + "epoch": 3.55, + "learning_rate": "1.2350e-04", + "loss": 0.6107, + "slid_loss": 0.6306, + "step": 3696, + "time": 12.31 + }, + { + "epoch": 3.55, + "learning_rate": "1.2347e-04", + "loss": 0.5969, + "slid_loss": 0.6309, + "step": 3697, + "time": 14.59 + }, + { + "epoch": 3.55, + "learning_rate": "1.2344e-04", + "loss": 0.6376, + "slid_loss": 0.631, + "step": 3698, + "time": 14.58 + }, + { + "epoch": 3.55, + "learning_rate": "1.2341e-04", + "loss": 0.8, + "slid_loss": 0.6322, + "step": 3699, + "time": 13.64 + }, + { + "epoch": 3.55, + "learning_rate": "1.2338e-04", + "loss": 0.6438, + "slid_loss": 0.6318, + "step": 3700, + "time": 13.95 + }, + { + "epoch": 3.56, + "learning_rate": "1.2335e-04", + "loss": 0.5858, + "slid_loss": 0.6315, + "step": 3701, + "time": 13.24 + }, + { + "epoch": 3.56, + "learning_rate": "1.2333e-04", + "loss": 0.6567, + "slid_loss": 0.6323, + "step": 3702, + "time": 11.28 + }, + { + "epoch": 3.56, + "learning_rate": "1.2330e-04", + "loss": 0.5824, + "slid_loss": 0.6324, + "step": 3703, + "time": 14.04 + }, + { + "epoch": 3.56, + "learning_rate": "1.2327e-04", + "loss": 0.6941, + "slid_loss": 0.6324, + "step": 3704, + "time": 12.66 + }, + { + "epoch": 3.56, + "learning_rate": "1.2324e-04", + "loss": 0.6124, + "slid_loss": 0.6315, + "step": 3705, + "time": 13.41 + }, + { + "epoch": 3.56, + "learning_rate": "1.2321e-04", + "loss": 0.6248, + "slid_loss": 0.6311, + "step": 3706, + "time": 13.33 + }, + { + "epoch": 3.56, + "learning_rate": "1.2318e-04", + "loss": 0.5926, + "slid_loss": 0.6303, + "step": 3707, + "time": 13.66 + }, + { + "epoch": 3.56, + "learning_rate": "1.2316e-04", + "loss": 0.6705, + "slid_loss": 0.6302, + "step": 3708, + "time": 12.8 + }, + { + "epoch": 3.56, + "learning_rate": "1.2313e-04", + "loss": 0.6563, + "slid_loss": 0.63, + "step": 3709, + "time": 12.94 + }, + { + "epoch": 3.56, + "learning_rate": "1.2310e-04", + "loss": 0.7114, + "slid_loss": 0.6308, + "step": 3710, + "time": 13.95 + }, + { + "epoch": 3.56, + "learning_rate": "1.2307e-04", + "loss": 0.6356, + "slid_loss": 0.6301, + "step": 3711, + "time": 13.8 + }, + { + "epoch": 3.57, + "learning_rate": "1.2304e-04", + "loss": 0.642, + "slid_loss": 0.6303, + "step": 3712, + "time": 14.56 + }, + { + "epoch": 3.57, + "learning_rate": "1.2301e-04", + "loss": 0.6729, + "slid_loss": 0.6301, + "step": 3713, + "time": 13.29 + }, + { + "epoch": 3.57, + "learning_rate": "1.2299e-04", + "loss": 0.6034, + "slid_loss": 0.6305, + "step": 3714, + "time": 13.58 + }, + { + "epoch": 3.57, + "learning_rate": "1.2296e-04", + "loss": 0.667, + "slid_loss": 0.6306, + "step": 3715, + "time": 13.85 + }, + { + "epoch": 3.57, + "learning_rate": "1.2293e-04", + "loss": 0.6051, + "slid_loss": 0.63, + "step": 3716, + "time": 12.23 + }, + { + "epoch": 3.57, + "learning_rate": "1.2290e-04", + "loss": 0.6163, + "slid_loss": 0.6299, + "step": 3717, + "time": 13.67 + }, + { + "epoch": 3.57, + "learning_rate": "1.2287e-04", + "loss": 0.6735, + "slid_loss": 0.6311, + "step": 3718, + "time": 12.77 + }, + { + "epoch": 3.57, + "learning_rate": "1.2284e-04", + "loss": 0.5517, + "slid_loss": 0.6308, + "step": 3719, + "time": 11.82 + }, + { + "epoch": 3.57, + "learning_rate": "1.2282e-04", + "loss": 0.6306, + "slid_loss": 0.6304, + "step": 3720, + "time": 12.95 + }, + { + "epoch": 3.57, + "learning_rate": "1.2279e-04", + "loss": 0.6732, + "slid_loss": 0.6316, + "step": 3721, + "time": 12.92 + }, + { + "epoch": 3.58, + "learning_rate": "1.2276e-04", + "loss": 0.5174, + "slid_loss": 0.6295, + "step": 3722, + "time": 12.29 + }, + { + "epoch": 3.58, + "learning_rate": "1.2273e-04", + "loss": 0.6443, + "slid_loss": 0.6288, + "step": 3723, + "time": 12.19 + }, + { + "epoch": 3.58, + "learning_rate": "1.2270e-04", + "loss": 0.6409, + "slid_loss": 0.629, + "step": 3724, + "time": 11.24 + }, + { + "epoch": 3.58, + "learning_rate": "1.2268e-04", + "loss": 0.656, + "slid_loss": 0.6289, + "step": 3725, + "time": 13.58 + }, + { + "epoch": 3.58, + "learning_rate": "1.2265e-04", + "loss": 0.5749, + "slid_loss": 0.6279, + "step": 3726, + "time": 12.98 + }, + { + "epoch": 3.58, + "learning_rate": "1.2262e-04", + "loss": 0.5907, + "slid_loss": 0.6271, + "step": 3727, + "time": 13.88 + }, + { + "epoch": 3.58, + "learning_rate": "1.2259e-04", + "loss": 0.6519, + "slid_loss": 0.6271, + "step": 3728, + "time": 13.26 + }, + { + "epoch": 3.58, + "learning_rate": "1.2256e-04", + "loss": 0.5387, + "slid_loss": 0.6256, + "step": 3729, + "time": 13.41 + }, + { + "epoch": 3.58, + "learning_rate": "1.2254e-04", + "loss": 0.566, + "slid_loss": 0.625, + "step": 3730, + "time": 11.62 + }, + { + "epoch": 3.58, + "learning_rate": "1.2251e-04", + "loss": 0.5706, + "slid_loss": 0.6246, + "step": 3731, + "time": 13.03 + }, + { + "epoch": 3.59, + "learning_rate": "1.2248e-04", + "loss": 0.612, + "slid_loss": 0.6233, + "step": 3732, + "time": 13.36 + }, + { + "epoch": 3.59, + "learning_rate": "1.2245e-04", + "loss": 0.6066, + "slid_loss": 0.6233, + "step": 3733, + "time": 15.02 + }, + { + "epoch": 3.59, + "learning_rate": "1.2242e-04", + "loss": 0.6042, + "slid_loss": 0.623, + "step": 3734, + "time": 13.84 + }, + { + "epoch": 3.59, + "learning_rate": "1.2240e-04", + "loss": 0.604, + "slid_loss": 0.6233, + "step": 3735, + "time": 11.83 + }, + { + "epoch": 3.59, + "learning_rate": "1.2237e-04", + "loss": 0.6495, + "slid_loss": 0.6228, + "step": 3736, + "time": 14.76 + }, + { + "epoch": 3.59, + "learning_rate": "1.2234e-04", + "loss": 0.6151, + "slid_loss": 0.6234, + "step": 3737, + "time": 11.74 + }, + { + "epoch": 3.59, + "learning_rate": "1.2231e-04", + "loss": 0.6755, + "slid_loss": 0.6235, + "step": 3738, + "time": 13.36 + }, + { + "epoch": 3.59, + "learning_rate": "1.2228e-04", + "loss": 0.5494, + "slid_loss": 0.6228, + "step": 3739, + "time": 12.73 + }, + { + "epoch": 3.59, + "learning_rate": "1.2226e-04", + "loss": 0.6258, + "slid_loss": 0.623, + "step": 3740, + "time": 10.9 + }, + { + "epoch": 3.59, + "learning_rate": "1.2223e-04", + "loss": 0.6919, + "slid_loss": 0.6234, + "step": 3741, + "time": 13.11 + }, + { + "epoch": 3.59, + "learning_rate": "1.2220e-04", + "loss": 0.5469, + "slid_loss": 0.6231, + "step": 3742, + "time": 13.61 + }, + { + "epoch": 3.6, + "learning_rate": "1.2217e-04", + "loss": 0.6807, + "slid_loss": 0.6233, + "step": 3743, + "time": 13.5 + }, + { + "epoch": 3.6, + "learning_rate": "1.2214e-04", + "loss": 0.5979, + "slid_loss": 0.6224, + "step": 3744, + "time": 12.86 + }, + { + "epoch": 3.6, + "learning_rate": "1.2212e-04", + "loss": 0.609, + "slid_loss": 0.6219, + "step": 3745, + "time": 14.03 + }, + { + "epoch": 3.6, + "learning_rate": "1.2209e-04", + "loss": 0.6616, + "slid_loss": 0.6203, + "step": 3746, + "time": 12.91 + }, + { + "epoch": 3.6, + "learning_rate": "1.2206e-04", + "loss": 0.6405, + "slid_loss": 0.6209, + "step": 3747, + "time": 13.74 + }, + { + "epoch": 3.6, + "learning_rate": "1.2203e-04", + "loss": 0.6997, + "slid_loss": 0.6212, + "step": 3748, + "time": 13.54 + }, + { + "epoch": 3.6, + "learning_rate": "1.2201e-04", + "loss": 0.5952, + "slid_loss": 0.6197, + "step": 3749, + "time": 13.88 + }, + { + "epoch": 3.6, + "learning_rate": "1.2198e-04", + "loss": 0.6096, + "slid_loss": 0.6195, + "step": 3750, + "time": 12.93 + }, + { + "epoch": 3.6, + "learning_rate": "1.2195e-04", + "loss": 0.6262, + "slid_loss": 0.6192, + "step": 3751, + "time": 12.07 + }, + { + "epoch": 3.6, + "learning_rate": "1.2192e-04", + "loss": 0.7387, + "slid_loss": 0.6202, + "step": 3752, + "time": 12.9 + }, + { + "epoch": 3.61, + "learning_rate": "1.2189e-04", + "loss": 0.6803, + "slid_loss": 0.6212, + "step": 3753, + "time": 11.68 + }, + { + "epoch": 3.61, + "learning_rate": "1.2187e-04", + "loss": 0.585, + "slid_loss": 0.6206, + "step": 3754, + "time": 11.82 + }, + { + "epoch": 3.61, + "learning_rate": "1.2184e-04", + "loss": 0.7339, + "slid_loss": 0.6209, + "step": 3755, + "time": 12.84 + }, + { + "epoch": 3.61, + "learning_rate": "1.2181e-04", + "loss": 0.5688, + "slid_loss": 0.6201, + "step": 3756, + "time": 11.95 + }, + { + "epoch": 3.61, + "learning_rate": "1.2178e-04", + "loss": 0.664, + "slid_loss": 0.6201, + "step": 3757, + "time": 13.49 + }, + { + "epoch": 3.61, + "learning_rate": "1.2176e-04", + "loss": 0.5742, + "slid_loss": 0.62, + "step": 3758, + "time": 14.4 + }, + { + "epoch": 3.61, + "learning_rate": "1.2173e-04", + "loss": 0.6069, + "slid_loss": 0.6197, + "step": 3759, + "time": 13.93 + }, + { + "epoch": 3.61, + "learning_rate": "1.2170e-04", + "loss": 0.6834, + "slid_loss": 0.6206, + "step": 3760, + "time": 13.6 + }, + { + "epoch": 3.61, + "learning_rate": "1.2167e-04", + "loss": 0.6422, + "slid_loss": 0.621, + "step": 3761, + "time": 11.37 + }, + { + "epoch": 3.61, + "learning_rate": "1.2165e-04", + "loss": 0.5186, + "slid_loss": 0.6197, + "step": 3762, + "time": 14.09 + }, + { + "epoch": 3.61, + "learning_rate": "1.2162e-04", + "loss": 0.6129, + "slid_loss": 0.6194, + "step": 3763, + "time": 10.97 + }, + { + "epoch": 3.62, + "learning_rate": "1.2159e-04", + "loss": 0.6105, + "slid_loss": 0.6194, + "step": 3764, + "time": 10.95 + }, + { + "epoch": 3.62, + "learning_rate": "1.2156e-04", + "loss": 0.5842, + "slid_loss": 0.62, + "step": 3765, + "time": 14.08 + }, + { + "epoch": 3.62, + "learning_rate": "1.2154e-04", + "loss": 0.5901, + "slid_loss": 0.6197, + "step": 3766, + "time": 13.8 + }, + { + "epoch": 3.62, + "learning_rate": "1.2151e-04", + "loss": 0.6424, + "slid_loss": 0.6204, + "step": 3767, + "time": 12.87 + }, + { + "epoch": 3.62, + "learning_rate": "1.2148e-04", + "loss": 0.6245, + "slid_loss": 0.6211, + "step": 3768, + "time": 12.9 + }, + { + "epoch": 3.62, + "learning_rate": "1.2145e-04", + "loss": 0.5785, + "slid_loss": 0.6204, + "step": 3769, + "time": 13.29 + }, + { + "epoch": 3.62, + "learning_rate": "1.2143e-04", + "loss": 0.5182, + "slid_loss": 0.6196, + "step": 3770, + "time": 12.97 + }, + { + "epoch": 3.62, + "learning_rate": "1.2140e-04", + "loss": 0.5652, + "slid_loss": 0.6188, + "step": 3771, + "time": 13.54 + }, + { + "epoch": 3.62, + "learning_rate": "1.2137e-04", + "loss": 0.5517, + "slid_loss": 0.6176, + "step": 3772, + "time": 13.37 + }, + { + "epoch": 3.62, + "learning_rate": "1.2134e-04", + "loss": 0.5413, + "slid_loss": 0.6175, + "step": 3773, + "time": 13.5 + }, + { + "epoch": 3.63, + "learning_rate": "1.2132e-04", + "loss": 0.6788, + "slid_loss": 0.6191, + "step": 3774, + "time": 11.17 + }, + { + "epoch": 3.63, + "learning_rate": "1.2129e-04", + "loss": 0.6935, + "slid_loss": 0.62, + "step": 3775, + "time": 13.88 + }, + { + "epoch": 3.63, + "learning_rate": "1.2126e-04", + "loss": 0.5992, + "slid_loss": 0.6197, + "step": 3776, + "time": 13.57 + }, + { + "epoch": 3.63, + "learning_rate": "1.2123e-04", + "loss": 0.645, + "slid_loss": 0.6207, + "step": 3777, + "time": 14.08 + }, + { + "epoch": 3.63, + "learning_rate": "1.2121e-04", + "loss": 0.6072, + "slid_loss": 0.6204, + "step": 3778, + "time": 13.7 + }, + { + "epoch": 3.63, + "learning_rate": "1.2118e-04", + "loss": 0.6472, + "slid_loss": 0.6218, + "step": 3779, + "time": 13.99 + }, + { + "epoch": 3.63, + "learning_rate": "1.2115e-04", + "loss": 0.5, + "slid_loss": 0.6206, + "step": 3780, + "time": 13.28 + }, + { + "epoch": 3.63, + "learning_rate": "1.2112e-04", + "loss": 0.5584, + "slid_loss": 0.62, + "step": 3781, + "time": 13.98 + }, + { + "epoch": 3.63, + "learning_rate": "1.2110e-04", + "loss": 0.6658, + "slid_loss": 0.6212, + "step": 3782, + "time": 13.31 + }, + { + "epoch": 3.63, + "learning_rate": "1.2107e-04", + "loss": 0.6418, + "slid_loss": 0.6212, + "step": 3783, + "time": 12.93 + }, + { + "epoch": 3.63, + "learning_rate": "1.2104e-04", + "loss": 0.675, + "slid_loss": 0.6215, + "step": 3784, + "time": 11.53 + }, + { + "epoch": 3.64, + "learning_rate": "1.2101e-04", + "loss": 0.6514, + "slid_loss": 0.6208, + "step": 3785, + "time": 13.82 + }, + { + "epoch": 3.64, + "learning_rate": "1.2099e-04", + "loss": 0.6823, + "slid_loss": 0.6217, + "step": 3786, + "time": 12.39 + }, + { + "epoch": 3.64, + "learning_rate": "1.2096e-04", + "loss": 0.7275, + "slid_loss": 0.6228, + "step": 3787, + "time": 11.63 + }, + { + "epoch": 3.64, + "learning_rate": "1.2093e-04", + "loss": 0.6916, + "slid_loss": 0.6243, + "step": 3788, + "time": 12.85 + }, + { + "epoch": 3.64, + "learning_rate": "1.2090e-04", + "loss": 0.6152, + "slid_loss": 0.6247, + "step": 3789, + "time": 13.88 + }, + { + "epoch": 3.64, + "learning_rate": "1.2088e-04", + "loss": 0.4926, + "slid_loss": 0.6237, + "step": 3790, + "time": 12.67 + }, + { + "epoch": 3.64, + "learning_rate": "1.2085e-04", + "loss": 0.6383, + "slid_loss": 0.6228, + "step": 3791, + "time": 13.42 + }, + { + "epoch": 3.64, + "learning_rate": "1.2082e-04", + "loss": 0.632, + "slid_loss": 0.623, + "step": 3792, + "time": 11.9 + }, + { + "epoch": 3.64, + "learning_rate": "1.2080e-04", + "loss": 0.6198, + "slid_loss": 0.6236, + "step": 3793, + "time": 13.03 + }, + { + "epoch": 3.64, + "learning_rate": "1.2077e-04", + "loss": 0.6196, + "slid_loss": 0.6237, + "step": 3794, + "time": 13.44 + }, + { + "epoch": 3.65, + "learning_rate": "1.2074e-04", + "loss": 0.5489, + "slid_loss": 0.6235, + "step": 3795, + "time": 13.49 + }, + { + "epoch": 3.65, + "learning_rate": "1.2071e-04", + "loss": 0.7149, + "slid_loss": 0.6245, + "step": 3796, + "time": 14.6 + }, + { + "epoch": 3.65, + "learning_rate": "1.2069e-04", + "loss": 0.626, + "slid_loss": 0.6248, + "step": 3797, + "time": 13.34 + }, + { + "epoch": 3.65, + "learning_rate": "1.2066e-04", + "loss": 0.6512, + "slid_loss": 0.6249, + "step": 3798, + "time": 13.53 + }, + { + "epoch": 3.65, + "learning_rate": "1.2063e-04", + "loss": 0.5921, + "slid_loss": 0.6229, + "step": 3799, + "time": 13.19 + }, + { + "epoch": 3.65, + "learning_rate": "1.2061e-04", + "loss": 0.7204, + "slid_loss": 0.6236, + "step": 3800, + "time": 13.33 + }, + { + "epoch": 3.65, + "learning_rate": "1.2058e-04", + "loss": 0.6504, + "slid_loss": 0.6243, + "step": 3801, + "time": 12.73 + }, + { + "epoch": 3.65, + "learning_rate": "1.2055e-04", + "loss": 0.6032, + "slid_loss": 0.6237, + "step": 3802, + "time": 13.5 + }, + { + "epoch": 3.65, + "learning_rate": "1.2052e-04", + "loss": 0.551, + "slid_loss": 0.6234, + "step": 3803, + "time": 13.43 + }, + { + "epoch": 3.65, + "learning_rate": "1.2050e-04", + "loss": 0.5887, + "slid_loss": 0.6224, + "step": 3804, + "time": 13.85 + }, + { + "epoch": 3.66, + "learning_rate": "1.2047e-04", + "loss": 0.6136, + "slid_loss": 0.6224, + "step": 3805, + "time": 13.32 + }, + { + "epoch": 3.66, + "learning_rate": "1.2044e-04", + "loss": 0.7176, + "slid_loss": 0.6233, + "step": 3806, + "time": 12.9 + }, + { + "epoch": 3.66, + "learning_rate": "1.2042e-04", + "loss": 0.6658, + "slid_loss": 0.624, + "step": 3807, + "time": 14.51 + }, + { + "epoch": 3.66, + "learning_rate": "1.2039e-04", + "loss": 0.5836, + "slid_loss": 0.6232, + "step": 3808, + "time": 12.71 + }, + { + "epoch": 3.66, + "learning_rate": "1.2036e-04", + "loss": 0.6107, + "slid_loss": 0.6227, + "step": 3809, + "time": 10.6 + }, + { + "epoch": 3.66, + "learning_rate": "1.2033e-04", + "loss": 0.6332, + "slid_loss": 0.6219, + "step": 3810, + "time": 13.2 + }, + { + "epoch": 3.66, + "learning_rate": "1.2031e-04", + "loss": 0.5605, + "slid_loss": 0.6212, + "step": 3811, + "time": 12.94 + }, + { + "epoch": 3.66, + "learning_rate": "1.2028e-04", + "loss": 0.5722, + "slid_loss": 0.6205, + "step": 3812, + "time": 14.27 + }, + { + "epoch": 3.66, + "learning_rate": "1.2025e-04", + "loss": 0.5751, + "slid_loss": 0.6195, + "step": 3813, + "time": 13.57 + }, + { + "epoch": 3.66, + "learning_rate": "1.2023e-04", + "loss": 0.5974, + "slid_loss": 0.6194, + "step": 3814, + "time": 13.07 + }, + { + "epoch": 3.66, + "learning_rate": "1.2020e-04", + "loss": 0.6686, + "slid_loss": 0.6195, + "step": 3815, + "time": 13.2 + }, + { + "epoch": 3.67, + "learning_rate": "1.2017e-04", + "loss": 0.5794, + "slid_loss": 0.6192, + "step": 3816, + "time": 13.39 + }, + { + "epoch": 3.67, + "learning_rate": "1.2015e-04", + "loss": 0.6551, + "slid_loss": 0.6196, + "step": 3817, + "time": 13.16 + }, + { + "epoch": 3.67, + "learning_rate": "1.2012e-04", + "loss": 0.6355, + "slid_loss": 0.6192, + "step": 3818, + "time": 13.78 + }, + { + "epoch": 3.67, + "learning_rate": "1.2009e-04", + "loss": 0.6196, + "slid_loss": 0.6199, + "step": 3819, + "time": 11.31 + }, + { + "epoch": 3.67, + "learning_rate": "1.2007e-04", + "loss": 0.5881, + "slid_loss": 0.6195, + "step": 3820, + "time": 12.24 + }, + { + "epoch": 3.67, + "learning_rate": "1.2004e-04", + "loss": 0.6033, + "slid_loss": 0.6188, + "step": 3821, + "time": 13.01 + }, + { + "epoch": 3.67, + "learning_rate": "1.2001e-04", + "loss": 0.567, + "slid_loss": 0.6193, + "step": 3822, + "time": 13.61 + }, + { + "epoch": 3.67, + "learning_rate": "1.1998e-04", + "loss": 0.5563, + "slid_loss": 0.6184, + "step": 3823, + "time": 13.35 + }, + { + "epoch": 3.67, + "learning_rate": "1.1996e-04", + "loss": 0.5843, + "slid_loss": 0.6178, + "step": 3824, + "time": 13.41 + }, + { + "epoch": 3.67, + "learning_rate": "1.1993e-04", + "loss": 0.654, + "slid_loss": 0.6178, + "step": 3825, + "time": 13.14 + }, + { + "epoch": 3.68, + "learning_rate": "1.1990e-04", + "loss": 0.6181, + "slid_loss": 0.6182, + "step": 3826, + "time": 11.42 + }, + { + "epoch": 3.68, + "learning_rate": "1.1988e-04", + "loss": 0.6686, + "slid_loss": 0.619, + "step": 3827, + "time": 12.91 + }, + { + "epoch": 3.68, + "learning_rate": "1.1985e-04", + "loss": 0.5144, + "slid_loss": 0.6176, + "step": 3828, + "time": 13.81 + }, + { + "epoch": 3.68, + "learning_rate": "1.1982e-04", + "loss": 0.5355, + "slid_loss": 0.6176, + "step": 3829, + "time": 14.09 + }, + { + "epoch": 3.68, + "learning_rate": "1.1980e-04", + "loss": 0.5977, + "slid_loss": 0.6179, + "step": 3830, + "time": 13.34 + }, + { + "epoch": 3.68, + "learning_rate": "1.1977e-04", + "loss": 0.6566, + "slid_loss": 0.6188, + "step": 3831, + "time": 13.28 + }, + { + "epoch": 3.68, + "learning_rate": "1.1974e-04", + "loss": 0.5541, + "slid_loss": 0.6182, + "step": 3832, + "time": 13.25 + }, + { + "epoch": 3.68, + "learning_rate": "1.1972e-04", + "loss": 0.6048, + "slid_loss": 0.6182, + "step": 3833, + "time": 13.36 + }, + { + "epoch": 3.68, + "learning_rate": "1.1969e-04", + "loss": 0.6216, + "slid_loss": 0.6184, + "step": 3834, + "time": 12.81 + }, + { + "epoch": 3.68, + "learning_rate": "1.1966e-04", + "loss": 0.6296, + "slid_loss": 0.6186, + "step": 3835, + "time": 12.91 + }, + { + "epoch": 3.68, + "learning_rate": "1.1964e-04", + "loss": 0.6122, + "slid_loss": 0.6182, + "step": 3836, + "time": 12.87 + }, + { + "epoch": 3.69, + "learning_rate": "1.1961e-04", + "loss": 0.5777, + "slid_loss": 0.6179, + "step": 3837, + "time": 13.74 + }, + { + "epoch": 3.69, + "learning_rate": "1.1958e-04", + "loss": 0.5797, + "slid_loss": 0.6169, + "step": 3838, + "time": 13.65 + }, + { + "epoch": 3.69, + "learning_rate": "1.1956e-04", + "loss": 0.6241, + "slid_loss": 0.6177, + "step": 3839, + "time": 13.42 + }, + { + "epoch": 3.69, + "learning_rate": "1.1953e-04", + "loss": 0.6271, + "slid_loss": 0.6177, + "step": 3840, + "time": 12.35 + }, + { + "epoch": 3.69, + "learning_rate": "1.1950e-04", + "loss": 0.6186, + "slid_loss": 0.6169, + "step": 3841, + "time": 13.46 + }, + { + "epoch": 3.69, + "learning_rate": "1.1948e-04", + "loss": 0.6126, + "slid_loss": 0.6176, + "step": 3842, + "time": 12.69 + }, + { + "epoch": 3.69, + "learning_rate": "1.1945e-04", + "loss": 0.6026, + "slid_loss": 0.6168, + "step": 3843, + "time": 12.92 + }, + { + "epoch": 3.69, + "learning_rate": "1.1942e-04", + "loss": 0.5642, + "slid_loss": 0.6165, + "step": 3844, + "time": 13.88 + }, + { + "epoch": 3.69, + "learning_rate": "1.1940e-04", + "loss": 0.5821, + "slid_loss": 0.6162, + "step": 3845, + "time": 12.43 + }, + { + "epoch": 3.69, + "learning_rate": "1.1937e-04", + "loss": 0.5647, + "slid_loss": 0.6152, + "step": 3846, + "time": 13.91 + }, + { + "epoch": 3.7, + "learning_rate": "1.1935e-04", + "loss": 0.5998, + "slid_loss": 0.6148, + "step": 3847, + "time": 11.46 + }, + { + "epoch": 3.7, + "learning_rate": "1.1932e-04", + "loss": 0.557, + "slid_loss": 0.6134, + "step": 3848, + "time": 13.48 + }, + { + "epoch": 3.7, + "learning_rate": "1.1929e-04", + "loss": 0.538, + "slid_loss": 0.6128, + "step": 3849, + "time": 11.36 + }, + { + "epoch": 3.7, + "learning_rate": "1.1927e-04", + "loss": 0.5282, + "slid_loss": 0.612, + "step": 3850, + "time": 12.6 + }, + { + "epoch": 3.7, + "learning_rate": "1.1924e-04", + "loss": 0.6106, + "slid_loss": 0.6119, + "step": 3851, + "time": 12.93 + }, + { + "epoch": 3.7, + "learning_rate": "1.1921e-04", + "loss": 0.6449, + "slid_loss": 0.6109, + "step": 3852, + "time": 11.75 + }, + { + "epoch": 3.7, + "learning_rate": "1.1919e-04", + "loss": 0.5999, + "slid_loss": 0.6101, + "step": 3853, + "time": 12.91 + }, + { + "epoch": 3.7, + "learning_rate": "1.1916e-04", + "loss": 0.601, + "slid_loss": 0.6103, + "step": 3854, + "time": 13.58 + }, + { + "epoch": 3.7, + "learning_rate": "1.1913e-04", + "loss": 0.689, + "slid_loss": 0.6098, + "step": 3855, + "time": 13.68 + }, + { + "epoch": 3.7, + "learning_rate": "1.1911e-04", + "loss": 0.5775, + "slid_loss": 0.6099, + "step": 3856, + "time": 12.82 + }, + { + "epoch": 3.71, + "learning_rate": "1.1908e-04", + "loss": 0.6907, + "slid_loss": 0.6102, + "step": 3857, + "time": 13.47 + }, + { + "epoch": 3.71, + "learning_rate": "1.1905e-04", + "loss": 0.7058, + "slid_loss": 0.6115, + "step": 3858, + "time": 12.41 + }, + { + "epoch": 3.71, + "learning_rate": "1.1903e-04", + "loss": 0.6423, + "slid_loss": 0.6118, + "step": 3859, + "time": 13.62 + }, + { + "epoch": 3.71, + "learning_rate": "1.1900e-04", + "loss": 0.6328, + "slid_loss": 0.6113, + "step": 3860, + "time": 11.6 + }, + { + "epoch": 3.71, + "learning_rate": "1.1898e-04", + "loss": 0.6567, + "slid_loss": 0.6115, + "step": 3861, + "time": 13.83 + }, + { + "epoch": 3.71, + "learning_rate": "1.1895e-04", + "loss": 0.656, + "slid_loss": 0.6129, + "step": 3862, + "time": 12.59 + }, + { + "epoch": 3.71, + "learning_rate": "1.1892e-04", + "loss": 0.7047, + "slid_loss": 0.6138, + "step": 3863, + "time": 13.99 + }, + { + "epoch": 3.71, + "learning_rate": "1.1890e-04", + "loss": 0.6702, + "slid_loss": 0.6144, + "step": 3864, + "time": 12.62 + }, + { + "epoch": 3.71, + "learning_rate": "1.1887e-04", + "loss": 0.5953, + "slid_loss": 0.6145, + "step": 3865, + "time": 13.82 + }, + { + "epoch": 3.71, + "learning_rate": "1.1884e-04", + "loss": 0.6306, + "slid_loss": 0.6149, + "step": 3866, + "time": 13.22 + }, + { + "epoch": 3.71, + "learning_rate": "1.1882e-04", + "loss": 0.6205, + "slid_loss": 0.6147, + "step": 3867, + "time": 14.19 + }, + { + "epoch": 3.72, + "learning_rate": "1.1879e-04", + "loss": 0.6103, + "slid_loss": 0.6145, + "step": 3868, + "time": 13.46 + }, + { + "epoch": 3.72, + "learning_rate": "1.1877e-04", + "loss": 0.6188, + "slid_loss": 0.6149, + "step": 3869, + "time": 13.16 + }, + { + "epoch": 3.72, + "learning_rate": "1.1874e-04", + "loss": 0.6661, + "slid_loss": 0.6164, + "step": 3870, + "time": 12.84 + }, + { + "epoch": 3.72, + "learning_rate": "1.1871e-04", + "loss": 0.6449, + "slid_loss": 0.6172, + "step": 3871, + "time": 13.93 + }, + { + "epoch": 3.72, + "learning_rate": "1.1869e-04", + "loss": 0.5969, + "slid_loss": 0.6177, + "step": 3872, + "time": 13.23 + }, + { + "epoch": 3.72, + "learning_rate": "1.1866e-04", + "loss": 0.608, + "slid_loss": 0.6183, + "step": 3873, + "time": 13.75 + }, + { + "epoch": 3.72, + "learning_rate": "1.1863e-04", + "loss": 0.6065, + "slid_loss": 0.6176, + "step": 3874, + "time": 14.18 + }, + { + "epoch": 3.72, + "learning_rate": "1.1861e-04", + "loss": 0.511, + "slid_loss": 0.6158, + "step": 3875, + "time": 13.18 + }, + { + "epoch": 3.72, + "learning_rate": "1.1858e-04", + "loss": 0.6424, + "slid_loss": 0.6162, + "step": 3876, + "time": 13.01 + }, + { + "epoch": 3.72, + "learning_rate": "1.1856e-04", + "loss": 0.6356, + "slid_loss": 0.6161, + "step": 3877, + "time": 13.11 + }, + { + "epoch": 3.73, + "learning_rate": "1.1853e-04", + "loss": 0.6643, + "slid_loss": 0.6167, + "step": 3878, + "time": 13.26 + }, + { + "epoch": 3.73, + "learning_rate": "1.1850e-04", + "loss": 0.6946, + "slid_loss": 0.6172, + "step": 3879, + "time": 12.43 + }, + { + "epoch": 3.73, + "learning_rate": "1.1848e-04", + "loss": 0.606, + "slid_loss": 0.6182, + "step": 3880, + "time": 10.94 + }, + { + "epoch": 3.73, + "learning_rate": "1.1845e-04", + "loss": 0.5943, + "slid_loss": 0.6186, + "step": 3881, + "time": 12.75 + }, + { + "epoch": 3.73, + "learning_rate": "1.1843e-04", + "loss": 0.6546, + "slid_loss": 0.6185, + "step": 3882, + "time": 13.31 + }, + { + "epoch": 3.73, + "learning_rate": "1.1840e-04", + "loss": 0.6581, + "slid_loss": 0.6186, + "step": 3883, + "time": 13.33 + }, + { + "epoch": 3.73, + "learning_rate": "1.1837e-04", + "loss": 0.5913, + "slid_loss": 0.6178, + "step": 3884, + "time": 13.98 + }, + { + "epoch": 3.73, + "learning_rate": "1.1835e-04", + "loss": 0.6954, + "slid_loss": 0.6182, + "step": 3885, + "time": 12.03 + }, + { + "epoch": 3.73, + "learning_rate": "1.1832e-04", + "loss": 0.6381, + "slid_loss": 0.6178, + "step": 3886, + "time": 13.3 + }, + { + "epoch": 3.73, + "learning_rate": "1.1830e-04", + "loss": 0.6418, + "slid_loss": 0.6169, + "step": 3887, + "time": 13.68 + }, + { + "epoch": 3.73, + "learning_rate": "1.1827e-04", + "loss": 0.6672, + "slid_loss": 0.6167, + "step": 3888, + "time": 13.21 + }, + { + "epoch": 3.74, + "learning_rate": "1.1824e-04", + "loss": 0.7444, + "slid_loss": 0.618, + "step": 3889, + "time": 11.97 + }, + { + "epoch": 3.74, + "learning_rate": "1.1822e-04", + "loss": 0.5814, + "slid_loss": 0.6189, + "step": 3890, + "time": 13.69 + }, + { + "epoch": 3.74, + "learning_rate": "1.1819e-04", + "loss": 0.6276, + "slid_loss": 0.6188, + "step": 3891, + "time": 12.82 + }, + { + "epoch": 3.74, + "learning_rate": "1.1817e-04", + "loss": 0.69, + "slid_loss": 0.6193, + "step": 3892, + "time": 14.09 + }, + { + "epoch": 3.74, + "learning_rate": "1.1814e-04", + "loss": 0.6502, + "slid_loss": 0.6197, + "step": 3893, + "time": 12.93 + }, + { + "epoch": 3.74, + "learning_rate": "1.1812e-04", + "loss": 0.5799, + "slid_loss": 0.6193, + "step": 3894, + "time": 13.22 + }, + { + "epoch": 3.74, + "learning_rate": "1.1809e-04", + "loss": 0.6028, + "slid_loss": 0.6198, + "step": 3895, + "time": 13.53 + }, + { + "epoch": 3.74, + "learning_rate": "1.1806e-04", + "loss": 0.5532, + "slid_loss": 0.6182, + "step": 3896, + "time": 12.86 + }, + { + "epoch": 3.74, + "learning_rate": "1.1804e-04", + "loss": 0.6812, + "slid_loss": 0.6187, + "step": 3897, + "time": 13.4 + }, + { + "epoch": 3.74, + "learning_rate": "1.1801e-04", + "loss": 0.5525, + "slid_loss": 0.6177, + "step": 3898, + "time": 12.98 + }, + { + "epoch": 3.75, + "learning_rate": "1.1799e-04", + "loss": 0.623, + "slid_loss": 0.618, + "step": 3899, + "time": 12.82 + }, + { + "epoch": 3.75, + "learning_rate": "1.1796e-04", + "loss": 0.6304, + "slid_loss": 0.6171, + "step": 3900, + "time": 13.39 + }, + { + "epoch": 3.75, + "learning_rate": "1.1793e-04", + "loss": 0.6627, + "slid_loss": 0.6173, + "step": 3901, + "time": 12.05 + }, + { + "epoch": 3.75, + "learning_rate": "1.1791e-04", + "loss": 0.6577, + "slid_loss": 0.6178, + "step": 3902, + "time": 14.26 + }, + { + "epoch": 3.75, + "learning_rate": "1.1788e-04", + "loss": 0.6943, + "slid_loss": 0.6192, + "step": 3903, + "time": 11.0 + }, + { + "epoch": 3.75, + "learning_rate": "1.1786e-04", + "loss": 0.6034, + "slid_loss": 0.6194, + "step": 3904, + "time": 13.85 + }, + { + "epoch": 3.75, + "learning_rate": "1.1783e-04", + "loss": 0.6199, + "slid_loss": 0.6195, + "step": 3905, + "time": 13.59 + }, + { + "epoch": 3.75, + "learning_rate": "1.1781e-04", + "loss": 0.5759, + "slid_loss": 0.618, + "step": 3906, + "time": 12.97 + }, + { + "epoch": 3.75, + "learning_rate": "1.1778e-04", + "loss": 0.5461, + "slid_loss": 0.6168, + "step": 3907, + "time": 12.66 + }, + { + "epoch": 3.75, + "learning_rate": "1.1776e-04", + "loss": 0.6418, + "slid_loss": 0.6174, + "step": 3908, + "time": 13.64 + }, + { + "epoch": 3.76, + "learning_rate": "1.1773e-04", + "loss": 0.5691, + "slid_loss": 0.617, + "step": 3909, + "time": 13.33 + }, + { + "epoch": 3.76, + "learning_rate": "1.1770e-04", + "loss": 0.6176, + "slid_loss": 0.6169, + "step": 3910, + "time": 13.49 + }, + { + "epoch": 3.76, + "learning_rate": "1.1768e-04", + "loss": 0.6121, + "slid_loss": 0.6174, + "step": 3911, + "time": 12.96 + }, + { + "epoch": 3.76, + "learning_rate": "1.1765e-04", + "loss": 0.6318, + "slid_loss": 0.618, + "step": 3912, + "time": 14.11 + }, + { + "epoch": 3.76, + "learning_rate": "1.1763e-04", + "loss": 0.6085, + "slid_loss": 0.6183, + "step": 3913, + "time": 12.4 + }, + { + "epoch": 3.76, + "learning_rate": "1.1760e-04", + "loss": 0.5625, + "slid_loss": 0.618, + "step": 3914, + "time": 13.28 + }, + { + "epoch": 3.76, + "learning_rate": "1.1758e-04", + "loss": 0.5731, + "slid_loss": 0.617, + "step": 3915, + "time": 13.51 + }, + { + "epoch": 3.76, + "learning_rate": "1.1755e-04", + "loss": 0.5926, + "slid_loss": 0.6171, + "step": 3916, + "time": 14.1 + }, + { + "epoch": 3.76, + "learning_rate": "1.1753e-04", + "loss": 0.6802, + "slid_loss": 0.6174, + "step": 3917, + "time": 13.8 + }, + { + "epoch": 3.76, + "learning_rate": "1.1750e-04", + "loss": 0.5793, + "slid_loss": 0.6168, + "step": 3918, + "time": 11.23 + }, + { + "epoch": 3.76, + "learning_rate": "1.1747e-04", + "loss": 0.6132, + "slid_loss": 0.6168, + "step": 3919, + "time": 14.39 + }, + { + "epoch": 3.77, + "learning_rate": "1.1745e-04", + "loss": 0.6688, + "slid_loss": 0.6176, + "step": 3920, + "time": 12.11 + }, + { + "epoch": 3.77, + "learning_rate": "1.1742e-04", + "loss": 0.6304, + "slid_loss": 0.6178, + "step": 3921, + "time": 14.28 + }, + { + "epoch": 3.77, + "learning_rate": "1.1740e-04", + "loss": 0.6145, + "slid_loss": 0.6183, + "step": 3922, + "time": 13.32 + }, + { + "epoch": 3.77, + "learning_rate": "1.1737e-04", + "loss": 0.5607, + "slid_loss": 0.6184, + "step": 3923, + "time": 13.37 + }, + { + "epoch": 3.77, + "learning_rate": "1.1735e-04", + "loss": 0.6, + "slid_loss": 0.6185, + "step": 3924, + "time": 13.92 + }, + { + "epoch": 3.77, + "learning_rate": "1.1732e-04", + "loss": 0.6452, + "slid_loss": 0.6184, + "step": 3925, + "time": 12.34 + }, + { + "epoch": 3.77, + "learning_rate": "1.1730e-04", + "loss": 0.577, + "slid_loss": 0.618, + "step": 3926, + "time": 11.86 + }, + { + "epoch": 3.77, + "learning_rate": "1.1727e-04", + "loss": 0.6416, + "slid_loss": 0.6177, + "step": 3927, + "time": 11.12 + }, + { + "epoch": 3.77, + "learning_rate": "1.1725e-04", + "loss": 0.6312, + "slid_loss": 0.6189, + "step": 3928, + "time": 13.29 + }, + { + "epoch": 3.77, + "learning_rate": "1.1722e-04", + "loss": 0.584, + "slid_loss": 0.6194, + "step": 3929, + "time": 13.23 + }, + { + "epoch": 3.78, + "learning_rate": "1.1719e-04", + "loss": 0.4918, + "slid_loss": 0.6183, + "step": 3930, + "time": 13.21 + }, + { + "epoch": 3.78, + "learning_rate": "1.1717e-04", + "loss": 0.6898, + "slid_loss": 0.6187, + "step": 3931, + "time": 12.43 + }, + { + "epoch": 3.78, + "learning_rate": "1.1714e-04", + "loss": 0.6622, + "slid_loss": 0.6197, + "step": 3932, + "time": 12.29 + }, + { + "epoch": 3.78, + "learning_rate": "1.1712e-04", + "loss": 0.6442, + "slid_loss": 0.6201, + "step": 3933, + "time": 13.66 + }, + { + "epoch": 3.78, + "learning_rate": "1.1709e-04", + "loss": 0.5799, + "slid_loss": 0.6197, + "step": 3934, + "time": 14.19 + }, + { + "epoch": 3.78, + "learning_rate": "1.1707e-04", + "loss": 0.565, + "slid_loss": 0.6191, + "step": 3935, + "time": 13.75 + }, + { + "epoch": 3.78, + "learning_rate": "1.1704e-04", + "loss": 0.6401, + "slid_loss": 0.6194, + "step": 3936, + "time": 14.02 + }, + { + "epoch": 3.78, + "learning_rate": "1.1702e-04", + "loss": 0.67, + "slid_loss": 0.6203, + "step": 3937, + "time": 13.87 + }, + { + "epoch": 3.78, + "learning_rate": "1.1699e-04", + "loss": 0.6133, + "slid_loss": 0.6206, + "step": 3938, + "time": 14.06 + }, + { + "epoch": 3.78, + "learning_rate": "1.1697e-04", + "loss": 0.5876, + "slid_loss": 0.6203, + "step": 3939, + "time": 14.0 + }, + { + "epoch": 3.78, + "learning_rate": "1.1694e-04", + "loss": 0.5145, + "slid_loss": 0.6191, + "step": 3940, + "time": 13.32 + }, + { + "epoch": 3.79, + "learning_rate": "1.1692e-04", + "loss": 0.5337, + "slid_loss": 0.6183, + "step": 3941, + "time": 13.12 + }, + { + "epoch": 3.79, + "learning_rate": "1.1689e-04", + "loss": 0.5913, + "slid_loss": 0.6181, + "step": 3942, + "time": 13.71 + }, + { + "epoch": 3.79, + "learning_rate": "1.1687e-04", + "loss": 0.547, + "slid_loss": 0.6175, + "step": 3943, + "time": 13.22 + }, + { + "epoch": 3.79, + "learning_rate": "1.1684e-04", + "loss": 0.6322, + "slid_loss": 0.6182, + "step": 3944, + "time": 13.11 + }, + { + "epoch": 3.79, + "learning_rate": "1.1682e-04", + "loss": 0.5544, + "slid_loss": 0.6179, + "step": 3945, + "time": 13.65 + }, + { + "epoch": 3.79, + "learning_rate": "1.1679e-04", + "loss": 0.5796, + "slid_loss": 0.6181, + "step": 3946, + "time": 12.79 + }, + { + "epoch": 3.79, + "learning_rate": "1.1677e-04", + "loss": 0.6877, + "slid_loss": 0.6189, + "step": 3947, + "time": 13.68 + }, + { + "epoch": 3.79, + "learning_rate": "1.1674e-04", + "loss": 0.5141, + "slid_loss": 0.6185, + "step": 3948, + "time": 12.77 + }, + { + "epoch": 3.79, + "learning_rate": "1.1672e-04", + "loss": 0.5914, + "slid_loss": 0.619, + "step": 3949, + "time": 12.28 + }, + { + "epoch": 3.79, + "learning_rate": "1.1669e-04", + "loss": 0.5865, + "slid_loss": 0.6196, + "step": 3950, + "time": 13.48 + }, + { + "epoch": 3.8, + "learning_rate": "1.1667e-04", + "loss": 0.5657, + "slid_loss": 0.6192, + "step": 3951, + "time": 13.54 + }, + { + "epoch": 3.8, + "learning_rate": "1.1664e-04", + "loss": 0.5483, + "slid_loss": 0.6182, + "step": 3952, + "time": 13.23 + }, + { + "epoch": 3.8, + "learning_rate": "1.1662e-04", + "loss": 0.5609, + "slid_loss": 0.6178, + "step": 3953, + "time": 13.32 + }, + { + "epoch": 3.8, + "learning_rate": "1.1659e-04", + "loss": 0.573, + "slid_loss": 0.6175, + "step": 3954, + "time": 11.47 + }, + { + "epoch": 3.8, + "learning_rate": "1.1657e-04", + "loss": 0.6303, + "slid_loss": 0.617, + "step": 3955, + "time": 13.31 + }, + { + "epoch": 3.8, + "learning_rate": "1.1654e-04", + "loss": 0.6452, + "slid_loss": 0.6176, + "step": 3956, + "time": 13.38 + }, + { + "epoch": 3.8, + "learning_rate": "1.1652e-04", + "loss": 0.551, + "slid_loss": 0.6162, + "step": 3957, + "time": 12.05 + }, + { + "epoch": 3.8, + "learning_rate": "1.1649e-04", + "loss": 0.5349, + "slid_loss": 0.6145, + "step": 3958, + "time": 13.73 + }, + { + "epoch": 3.8, + "learning_rate": "1.1647e-04", + "loss": 0.697, + "slid_loss": 0.6151, + "step": 3959, + "time": 13.55 + }, + { + "epoch": 3.8, + "learning_rate": "1.1644e-04", + "loss": 0.6113, + "slid_loss": 0.6149, + "step": 3960, + "time": 13.88 + }, + { + "epoch": 3.8, + "learning_rate": "1.1642e-04", + "loss": 0.607, + "slid_loss": 0.6144, + "step": 3961, + "time": 12.11 + }, + { + "epoch": 3.81, + "learning_rate": "1.1639e-04", + "loss": 0.6524, + "slid_loss": 0.6143, + "step": 3962, + "time": 13.7 + }, + { + "epoch": 3.81, + "learning_rate": "1.1637e-04", + "loss": 0.6533, + "slid_loss": 0.6138, + "step": 3963, + "time": 11.73 + }, + { + "epoch": 3.81, + "learning_rate": "1.1634e-04", + "loss": 0.5206, + "slid_loss": 0.6123, + "step": 3964, + "time": 13.72 + }, + { + "epoch": 3.81, + "learning_rate": "1.1632e-04", + "loss": 0.5989, + "slid_loss": 0.6124, + "step": 3965, + "time": 13.21 + }, + { + "epoch": 3.81, + "learning_rate": "1.1629e-04", + "loss": 0.5634, + "slid_loss": 0.6117, + "step": 3966, + "time": 11.34 + }, + { + "epoch": 3.81, + "learning_rate": "1.1627e-04", + "loss": 0.6446, + "slid_loss": 0.6119, + "step": 3967, + "time": 13.25 + }, + { + "epoch": 3.81, + "learning_rate": "1.1624e-04", + "loss": 0.5039, + "slid_loss": 0.6109, + "step": 3968, + "time": 13.43 + }, + { + "epoch": 3.81, + "learning_rate": "1.1622e-04", + "loss": 0.6873, + "slid_loss": 0.6115, + "step": 3969, + "time": 13.66 + }, + { + "epoch": 3.81, + "learning_rate": "1.1619e-04", + "loss": 0.6717, + "slid_loss": 0.6116, + "step": 3970, + "time": 14.38 + }, + { + "epoch": 3.81, + "learning_rate": "1.1617e-04", + "loss": 0.5589, + "slid_loss": 0.6107, + "step": 3971, + "time": 14.27 + }, + { + "epoch": 3.82, + "learning_rate": "1.1615e-04", + "loss": 0.6608, + "slid_loss": 0.6114, + "step": 3972, + "time": 13.57 + }, + { + "epoch": 3.82, + "learning_rate": "1.1612e-04", + "loss": 0.5427, + "slid_loss": 0.6107, + "step": 3973, + "time": 13.63 + }, + { + "epoch": 3.82, + "learning_rate": "1.1610e-04", + "loss": 0.6012, + "slid_loss": 0.6107, + "step": 3974, + "time": 12.18 + }, + { + "epoch": 3.82, + "learning_rate": "1.1607e-04", + "loss": 0.6929, + "slid_loss": 0.6125, + "step": 3975, + "time": 14.38 + }, + { + "epoch": 3.82, + "learning_rate": "1.1605e-04", + "loss": 0.572, + "slid_loss": 0.6118, + "step": 3976, + "time": 13.57 + }, + { + "epoch": 3.82, + "learning_rate": "1.1602e-04", + "loss": 0.5907, + "slid_loss": 0.6113, + "step": 3977, + "time": 13.2 + }, + { + "epoch": 3.82, + "learning_rate": "1.1600e-04", + "loss": 0.6346, + "slid_loss": 0.611, + "step": 3978, + "time": 12.94 + }, + { + "epoch": 3.82, + "learning_rate": "1.1597e-04", + "loss": 0.6355, + "slid_loss": 0.6105, + "step": 3979, + "time": 13.38 + }, + { + "epoch": 3.82, + "learning_rate": "1.1595e-04", + "loss": 0.6397, + "slid_loss": 0.6108, + "step": 3980, + "time": 13.4 + }, + { + "epoch": 3.82, + "learning_rate": "1.1592e-04", + "loss": 0.5998, + "slid_loss": 0.6108, + "step": 3981, + "time": 13.38 + }, + { + "epoch": 3.83, + "learning_rate": "1.1590e-04", + "loss": 0.5748, + "slid_loss": 0.61, + "step": 3982, + "time": 13.39 + }, + { + "epoch": 3.83, + "learning_rate": "1.1587e-04", + "loss": 0.6448, + "slid_loss": 0.6099, + "step": 3983, + "time": 13.32 + }, + { + "epoch": 3.83, + "learning_rate": "1.1585e-04", + "loss": 0.6706, + "slid_loss": 0.6107, + "step": 3984, + "time": 13.15 + }, + { + "epoch": 3.83, + "learning_rate": "1.1583e-04", + "loss": 0.5662, + "slid_loss": 0.6094, + "step": 3985, + "time": 14.15 + }, + { + "epoch": 3.83, + "learning_rate": "1.1580e-04", + "loss": 0.6456, + "slid_loss": 0.6095, + "step": 3986, + "time": 13.34 + }, + { + "epoch": 3.83, + "learning_rate": "1.1578e-04", + "loss": 0.6174, + "slid_loss": 0.6092, + "step": 3987, + "time": 13.38 + }, + { + "epoch": 3.83, + "learning_rate": "1.1575e-04", + "loss": 0.5909, + "slid_loss": 0.6085, + "step": 3988, + "time": 12.83 + }, + { + "epoch": 3.83, + "learning_rate": "1.1573e-04", + "loss": 0.5482, + "slid_loss": 0.6065, + "step": 3989, + "time": 13.2 + }, + { + "epoch": 3.83, + "learning_rate": "1.1570e-04", + "loss": 0.5433, + "slid_loss": 0.6061, + "step": 3990, + "time": 13.64 + }, + { + "epoch": 3.83, + "learning_rate": "1.1568e-04", + "loss": 0.6493, + "slid_loss": 0.6064, + "step": 3991, + "time": 13.01 + }, + { + "epoch": 3.83, + "learning_rate": "1.1565e-04", + "loss": 0.6007, + "slid_loss": 0.6055, + "step": 3992, + "time": 13.47 + }, + { + "epoch": 3.84, + "learning_rate": "1.1563e-04", + "loss": 0.5374, + "slid_loss": 0.6043, + "step": 3993, + "time": 13.64 + }, + { + "epoch": 3.84, + "learning_rate": "1.1561e-04", + "loss": 0.6201, + "slid_loss": 0.6047, + "step": 3994, + "time": 12.09 + }, + { + "epoch": 3.84, + "learning_rate": "1.1558e-04", + "loss": 0.6344, + "slid_loss": 0.6051, + "step": 3995, + "time": 12.35 + }, + { + "epoch": 3.84, + "learning_rate": "1.1556e-04", + "loss": 0.6361, + "slid_loss": 0.6059, + "step": 3996, + "time": 13.45 + }, + { + "epoch": 3.84, + "learning_rate": "1.1553e-04", + "loss": 0.5547, + "slid_loss": 0.6046, + "step": 3997, + "time": 13.43 + }, + { + "epoch": 3.84, + "learning_rate": "1.1551e-04", + "loss": 0.5554, + "slid_loss": 0.6046, + "step": 3998, + "time": 14.22 + }, + { + "epoch": 3.84, + "learning_rate": "1.1548e-04", + "loss": 0.5539, + "slid_loss": 0.604, + "step": 3999, + "time": 13.39 + }, + { + "epoch": 3.84, + "learning_rate": "1.1546e-04", + "loss": 0.5941, + "slid_loss": 0.6036, + "step": 4000, + "time": 13.61 + }, + { + "epoch": 3.84, + "learning_rate": "1.1544e-04", + "loss": 0.5709, + "slid_loss": 0.6027, + "step": 4001, + "time": 13.73 + }, + { + "epoch": 3.84, + "learning_rate": "1.1541e-04", + "loss": 0.6868, + "slid_loss": 0.603, + "step": 4002, + "time": 13.23 + }, + { + "epoch": 3.85, + "learning_rate": "1.1539e-04", + "loss": 0.5791, + "slid_loss": 0.6018, + "step": 4003, + "time": 13.23 + }, + { + "epoch": 3.85, + "learning_rate": "1.1536e-04", + "loss": 0.5463, + "slid_loss": 0.6012, + "step": 4004, + "time": 11.23 + }, + { + "epoch": 3.85, + "learning_rate": "1.1534e-04", + "loss": 0.5575, + "slid_loss": 0.6006, + "step": 4005, + "time": 13.64 + }, + { + "epoch": 3.85, + "learning_rate": "1.1531e-04", + "loss": 0.5689, + "slid_loss": 0.6005, + "step": 4006, + "time": 11.56 + }, + { + "epoch": 3.85, + "learning_rate": "1.1529e-04", + "loss": 0.4924, + "slid_loss": 0.6, + "step": 4007, + "time": 12.04 + }, + { + "epoch": 3.85, + "learning_rate": "1.1527e-04", + "loss": 0.7048, + "slid_loss": 0.6006, + "step": 4008, + "time": 13.35 + }, + { + "epoch": 3.85, + "learning_rate": "1.1524e-04", + "loss": 0.6141, + "slid_loss": 0.6011, + "step": 4009, + "time": 12.82 + }, + { + "epoch": 3.85, + "learning_rate": "1.1522e-04", + "loss": 0.5827, + "slid_loss": 0.6007, + "step": 4010, + "time": 13.26 + }, + { + "epoch": 3.85, + "learning_rate": "1.1519e-04", + "loss": 0.6003, + "slid_loss": 0.6006, + "step": 4011, + "time": 14.16 + }, + { + "epoch": 3.85, + "learning_rate": "1.1517e-04", + "loss": 0.6297, + "slid_loss": 0.6006, + "step": 4012, + "time": 13.15 + }, + { + "epoch": 3.85, + "learning_rate": "1.1515e-04", + "loss": 0.6269, + "slid_loss": 0.6008, + "step": 4013, + "time": 13.77 + }, + { + "epoch": 3.86, + "learning_rate": "1.1512e-04", + "loss": 0.6014, + "slid_loss": 0.6012, + "step": 4014, + "time": 13.13 + }, + { + "epoch": 3.86, + "learning_rate": "1.1510e-04", + "loss": 0.6625, + "slid_loss": 0.6021, + "step": 4015, + "time": 11.9 + }, + { + "epoch": 3.86, + "learning_rate": "1.1507e-04", + "loss": 0.6091, + "slid_loss": 0.6022, + "step": 4016, + "time": 13.21 + }, + { + "epoch": 3.86, + "learning_rate": "1.1505e-04", + "loss": 0.5482, + "slid_loss": 0.6009, + "step": 4017, + "time": 11.78 + }, + { + "epoch": 3.86, + "learning_rate": "1.1503e-04", + "loss": 0.5717, + "slid_loss": 0.6008, + "step": 4018, + "time": 13.78 + }, + { + "epoch": 3.86, + "learning_rate": "1.1500e-04", + "loss": 0.5935, + "slid_loss": 0.6006, + "step": 4019, + "time": 11.38 + }, + { + "epoch": 3.86, + "learning_rate": "1.1498e-04", + "loss": 0.5715, + "slid_loss": 0.5997, + "step": 4020, + "time": 13.87 + }, + { + "epoch": 3.86, + "learning_rate": "1.1495e-04", + "loss": 0.5362, + "slid_loss": 0.5987, + "step": 4021, + "time": 12.77 + }, + { + "epoch": 3.86, + "learning_rate": "1.1493e-04", + "loss": 0.5917, + "slid_loss": 0.5985, + "step": 4022, + "time": 12.45 + }, + { + "epoch": 3.86, + "learning_rate": "1.1491e-04", + "loss": 0.6247, + "slid_loss": 0.5991, + "step": 4023, + "time": 13.97 + }, + { + "epoch": 3.87, + "learning_rate": "1.1488e-04", + "loss": 0.6118, + "slid_loss": 0.5993, + "step": 4024, + "time": 11.36 + }, + { + "epoch": 3.87, + "learning_rate": "1.1486e-04", + "loss": 0.5927, + "slid_loss": 0.5987, + "step": 4025, + "time": 11.69 + }, + { + "epoch": 3.87, + "learning_rate": "1.1483e-04", + "loss": 0.5684, + "slid_loss": 0.5986, + "step": 4026, + "time": 13.57 + }, + { + "epoch": 3.87, + "learning_rate": "1.1481e-04", + "loss": 0.6039, + "slid_loss": 0.5983, + "step": 4027, + "time": 13.81 + }, + { + "epoch": 3.87, + "learning_rate": "1.1479e-04", + "loss": 0.5953, + "slid_loss": 0.5979, + "step": 4028, + "time": 11.92 + }, + { + "epoch": 3.87, + "learning_rate": "1.1476e-04", + "loss": 0.6248, + "slid_loss": 0.5983, + "step": 4029, + "time": 13.69 + }, + { + "epoch": 3.87, + "learning_rate": "1.1474e-04", + "loss": 0.5763, + "slid_loss": 0.5992, + "step": 4030, + "time": 11.51 + }, + { + "epoch": 3.87, + "learning_rate": "1.1472e-04", + "loss": 0.6379, + "slid_loss": 0.5986, + "step": 4031, + "time": 13.17 + }, + { + "epoch": 3.87, + "learning_rate": "1.1469e-04", + "loss": 0.6987, + "slid_loss": 0.599, + "step": 4032, + "time": 14.07 + }, + { + "epoch": 3.87, + "learning_rate": "1.1467e-04", + "loss": 0.5976, + "slid_loss": 0.5985, + "step": 4033, + "time": 14.13 + }, + { + "epoch": 3.88, + "learning_rate": "1.1464e-04", + "loss": 0.4649, + "slid_loss": 0.5974, + "step": 4034, + "time": 13.26 + }, + { + "epoch": 3.88, + "learning_rate": "1.1462e-04", + "loss": 0.6506, + "slid_loss": 0.5983, + "step": 4035, + "time": 11.66 + }, + { + "epoch": 3.88, + "learning_rate": "1.1460e-04", + "loss": 0.6715, + "slid_loss": 0.5986, + "step": 4036, + "time": 14.48 + }, + { + "epoch": 3.88, + "learning_rate": "1.1457e-04", + "loss": 0.6134, + "slid_loss": 0.598, + "step": 4037, + "time": 11.9 + }, + { + "epoch": 3.88, + "learning_rate": "1.1455e-04", + "loss": 0.6169, + "slid_loss": 0.598, + "step": 4038, + "time": 11.76 + }, + { + "epoch": 3.88, + "learning_rate": "1.1453e-04", + "loss": 0.5726, + "slid_loss": 0.5979, + "step": 4039, + "time": 11.18 + }, + { + "epoch": 3.88, + "learning_rate": "1.1450e-04", + "loss": 0.6319, + "slid_loss": 0.5991, + "step": 4040, + "time": 14.2 + }, + { + "epoch": 3.88, + "learning_rate": "1.1448e-04", + "loss": 0.595, + "slid_loss": 0.5997, + "step": 4041, + "time": 12.03 + }, + { + "epoch": 3.88, + "learning_rate": "1.1446e-04", + "loss": 0.5861, + "slid_loss": 0.5996, + "step": 4042, + "time": 13.66 + }, + { + "epoch": 3.88, + "learning_rate": "1.1443e-04", + "loss": 0.5523, + "slid_loss": 0.5997, + "step": 4043, + "time": 13.19 + }, + { + "epoch": 3.88, + "learning_rate": "1.1441e-04", + "loss": 0.5871, + "slid_loss": 0.5992, + "step": 4044, + "time": 12.68 + }, + { + "epoch": 3.89, + "learning_rate": "1.1438e-04", + "loss": 0.5741, + "slid_loss": 0.5994, + "step": 4045, + "time": 12.74 + }, + { + "epoch": 3.89, + "learning_rate": "1.1436e-04", + "loss": 0.6942, + "slid_loss": 0.6006, + "step": 4046, + "time": 13.95 + }, + { + "epoch": 3.89, + "learning_rate": "1.1434e-04", + "loss": 0.5799, + "slid_loss": 0.5995, + "step": 4047, + "time": 13.04 + }, + { + "epoch": 3.89, + "learning_rate": "1.1431e-04", + "loss": 0.6397, + "slid_loss": 0.6007, + "step": 4048, + "time": 10.81 + }, + { + "epoch": 3.89, + "learning_rate": "1.1429e-04", + "loss": 0.6026, + "slid_loss": 0.6009, + "step": 4049, + "time": 13.88 + }, + { + "epoch": 3.89, + "learning_rate": "1.1427e-04", + "loss": 0.5622, + "slid_loss": 0.6006, + "step": 4050, + "time": 13.3 + }, + { + "epoch": 3.89, + "learning_rate": "1.1424e-04", + "loss": 0.6563, + "slid_loss": 0.6015, + "step": 4051, + "time": 14.0 + }, + { + "epoch": 3.89, + "learning_rate": "1.1422e-04", + "loss": 0.5384, + "slid_loss": 0.6014, + "step": 4052, + "time": 12.55 + }, + { + "epoch": 3.89, + "learning_rate": "1.1420e-04", + "loss": 0.6316, + "slid_loss": 0.6021, + "step": 4053, + "time": 13.21 + }, + { + "epoch": 3.89, + "learning_rate": "1.1417e-04", + "loss": 0.5316, + "slid_loss": 0.6017, + "step": 4054, + "time": 12.44 + }, + { + "epoch": 3.9, + "learning_rate": "1.1415e-04", + "loss": 0.6343, + "slid_loss": 0.6018, + "step": 4055, + "time": 13.41 + }, + { + "epoch": 3.9, + "learning_rate": "1.1413e-04", + "loss": 0.4908, + "slid_loss": 0.6002, + "step": 4056, + "time": 13.5 + }, + { + "epoch": 3.9, + "learning_rate": "1.1410e-04", + "loss": 0.6749, + "slid_loss": 0.6014, + "step": 4057, + "time": 13.78 + }, + { + "epoch": 3.9, + "learning_rate": "1.1408e-04", + "loss": 0.6145, + "slid_loss": 0.6022, + "step": 4058, + "time": 13.94 + }, + { + "epoch": 3.9, + "learning_rate": "1.1406e-04", + "loss": 0.5594, + "slid_loss": 0.6009, + "step": 4059, + "time": 10.98 + }, + { + "epoch": 3.9, + "learning_rate": "1.1403e-04", + "loss": 0.614, + "slid_loss": 0.6009, + "step": 4060, + "time": 13.57 + }, + { + "epoch": 3.9, + "learning_rate": "1.1401e-04", + "loss": 0.5688, + "slid_loss": 0.6005, + "step": 4061, + "time": 11.28 + }, + { + "epoch": 3.9, + "learning_rate": "1.1399e-04", + "loss": 0.6423, + "slid_loss": 0.6004, + "step": 4062, + "time": 13.02 + }, + { + "epoch": 3.9, + "learning_rate": "1.1396e-04", + "loss": 0.5354, + "slid_loss": 0.5992, + "step": 4063, + "time": 12.79 + }, + { + "epoch": 3.9, + "learning_rate": "1.1394e-04", + "loss": 0.6376, + "slid_loss": 0.6004, + "step": 4064, + "time": 11.44 + }, + { + "epoch": 3.9, + "learning_rate": "1.1392e-04", + "loss": 0.6201, + "slid_loss": 0.6006, + "step": 4065, + "time": 13.74 + }, + { + "epoch": 3.91, + "learning_rate": "1.1389e-04", + "loss": 0.611, + "slid_loss": 0.6011, + "step": 4066, + "time": 13.34 + }, + { + "epoch": 3.91, + "learning_rate": "1.1387e-04", + "loss": 0.592, + "slid_loss": 0.6006, + "step": 4067, + "time": 13.88 + }, + { + "epoch": 3.91, + "learning_rate": "1.1385e-04", + "loss": 0.6536, + "slid_loss": 0.6021, + "step": 4068, + "time": 12.28 + }, + { + "epoch": 3.91, + "learning_rate": "1.1382e-04", + "loss": 0.5569, + "slid_loss": 0.6008, + "step": 4069, + "time": 12.31 + }, + { + "epoch": 3.91, + "learning_rate": "1.1380e-04", + "loss": 0.5879, + "slid_loss": 0.5999, + "step": 4070, + "time": 13.76 + }, + { + "epoch": 3.91, + "learning_rate": "1.1378e-04", + "loss": 0.5516, + "slid_loss": 0.5998, + "step": 4071, + "time": 14.0 + }, + { + "epoch": 3.91, + "learning_rate": "1.1376e-04", + "loss": 0.5705, + "slid_loss": 0.5989, + "step": 4072, + "time": 12.19 + }, + { + "epoch": 3.91, + "learning_rate": "1.1373e-04", + "loss": 0.5519, + "slid_loss": 0.599, + "step": 4073, + "time": 13.78 + }, + { + "epoch": 3.91, + "learning_rate": "1.1371e-04", + "loss": 0.6779, + "slid_loss": 0.5998, + "step": 4074, + "time": 14.07 + }, + { + "epoch": 3.91, + "learning_rate": "1.1369e-04", + "loss": 0.5639, + "slid_loss": 0.5985, + "step": 4075, + "time": 12.84 + }, + { + "epoch": 3.92, + "learning_rate": "1.1366e-04", + "loss": 0.628, + "slid_loss": 0.5991, + "step": 4076, + "time": 12.76 + }, + { + "epoch": 3.92, + "learning_rate": "1.1364e-04", + "loss": 0.535, + "slid_loss": 0.5985, + "step": 4077, + "time": 11.41 + }, + { + "epoch": 3.92, + "learning_rate": "1.1362e-04", + "loss": 0.5914, + "slid_loss": 0.5981, + "step": 4078, + "time": 12.02 + }, + { + "epoch": 3.92, + "learning_rate": "1.1359e-04", + "loss": 0.6155, + "slid_loss": 0.5979, + "step": 4079, + "time": 12.76 + }, + { + "epoch": 3.92, + "learning_rate": "1.1357e-04", + "loss": 0.5951, + "slid_loss": 0.5974, + "step": 4080, + "time": 12.0 + }, + { + "epoch": 3.92, + "learning_rate": "1.1355e-04", + "loss": 0.6527, + "slid_loss": 0.598, + "step": 4081, + "time": 13.89 + }, + { + "epoch": 3.92, + "learning_rate": "1.1352e-04", + "loss": 0.6046, + "slid_loss": 0.5983, + "step": 4082, + "time": 13.33 + }, + { + "epoch": 3.92, + "learning_rate": "1.1350e-04", + "loss": 0.6154, + "slid_loss": 0.598, + "step": 4083, + "time": 13.41 + }, + { + "epoch": 3.92, + "learning_rate": "1.1348e-04", + "loss": 0.6121, + "slid_loss": 0.5974, + "step": 4084, + "time": 13.5 + }, + { + "epoch": 3.92, + "learning_rate": "1.1346e-04", + "loss": 0.6485, + "slid_loss": 0.5982, + "step": 4085, + "time": 11.67 + }, + { + "epoch": 3.93, + "learning_rate": "1.1343e-04", + "loss": 0.7311, + "slid_loss": 0.5991, + "step": 4086, + "time": 13.22 + }, + { + "epoch": 3.93, + "learning_rate": "1.1341e-04", + "loss": 0.5099, + "slid_loss": 0.598, + "step": 4087, + "time": 13.43 + }, + { + "epoch": 3.93, + "learning_rate": "1.1339e-04", + "loss": 0.6362, + "slid_loss": 0.5984, + "step": 4088, + "time": 13.43 + }, + { + "epoch": 3.93, + "learning_rate": "1.1336e-04", + "loss": 0.5041, + "slid_loss": 0.598, + "step": 4089, + "time": 12.98 + }, + { + "epoch": 3.93, + "learning_rate": "1.1334e-04", + "loss": 0.5043, + "slid_loss": 0.5976, + "step": 4090, + "time": 13.16 + }, + { + "epoch": 3.93, + "learning_rate": "1.1332e-04", + "loss": 0.573, + "slid_loss": 0.5968, + "step": 4091, + "time": 11.11 + }, + { + "epoch": 3.93, + "learning_rate": "1.1330e-04", + "loss": 0.6261, + "slid_loss": 0.5971, + "step": 4092, + "time": 13.3 + }, + { + "epoch": 3.93, + "learning_rate": "1.1327e-04", + "loss": 0.6232, + "slid_loss": 0.598, + "step": 4093, + "time": 13.0 + }, + { + "epoch": 3.93, + "learning_rate": "1.1325e-04", + "loss": 0.558, + "slid_loss": 0.5973, + "step": 4094, + "time": 13.83 + }, + { + "epoch": 3.93, + "learning_rate": "1.1323e-04", + "loss": 0.6123, + "slid_loss": 0.5971, + "step": 4095, + "time": 12.11 + }, + { + "epoch": 3.93, + "learning_rate": "1.1321e-04", + "loss": 0.6046, + "slid_loss": 0.5968, + "step": 4096, + "time": 13.82 + }, + { + "epoch": 3.94, + "learning_rate": "1.1318e-04", + "loss": 0.6053, + "slid_loss": 0.5973, + "step": 4097, + "time": 12.94 + }, + { + "epoch": 3.94, + "learning_rate": "1.1316e-04", + "loss": 0.6471, + "slid_loss": 0.5982, + "step": 4098, + "time": 12.94 + }, + { + "epoch": 3.94, + "learning_rate": "1.1314e-04", + "loss": 0.7122, + "slid_loss": 0.5998, + "step": 4099, + "time": 13.16 + }, + { + "epoch": 3.94, + "learning_rate": "1.1311e-04", + "loss": 0.5361, + "slid_loss": 0.5992, + "step": 4100, + "time": 13.48 + }, + { + "epoch": 3.94, + "learning_rate": "1.1309e-04", + "loss": 0.5857, + "slid_loss": 0.5994, + "step": 4101, + "time": 13.69 + }, + { + "epoch": 3.94, + "learning_rate": "1.1307e-04", + "loss": 0.5808, + "slid_loss": 0.5983, + "step": 4102, + "time": 13.3 + }, + { + "epoch": 3.94, + "learning_rate": "1.1305e-04", + "loss": 0.5174, + "slid_loss": 0.5977, + "step": 4103, + "time": 13.38 + }, + { + "epoch": 3.94, + "learning_rate": "1.1302e-04", + "loss": 0.5903, + "slid_loss": 0.5981, + "step": 4104, + "time": 12.98 + }, + { + "epoch": 3.94, + "learning_rate": "1.1300e-04", + "loss": 0.7024, + "slid_loss": 0.5996, + "step": 4105, + "time": 14.31 + }, + { + "epoch": 3.94, + "learning_rate": "1.1298e-04", + "loss": 0.5825, + "slid_loss": 0.5997, + "step": 4106, + "time": 12.37 + }, + { + "epoch": 3.95, + "learning_rate": "1.1296e-04", + "loss": 0.667, + "slid_loss": 0.6015, + "step": 4107, + "time": 12.83 + }, + { + "epoch": 3.95, + "learning_rate": "1.1293e-04", + "loss": 0.5489, + "slid_loss": 0.5999, + "step": 4108, + "time": 11.89 + }, + { + "epoch": 3.95, + "learning_rate": "1.1291e-04", + "loss": 0.5863, + "slid_loss": 0.5996, + "step": 4109, + "time": 14.33 + }, + { + "epoch": 3.95, + "learning_rate": "1.1289e-04", + "loss": 0.5454, + "slid_loss": 0.5993, + "step": 4110, + "time": 11.3 + }, + { + "epoch": 3.95, + "learning_rate": "1.1287e-04", + "loss": 0.5276, + "slid_loss": 0.5985, + "step": 4111, + "time": 14.07 + }, + { + "epoch": 3.95, + "learning_rate": "1.1284e-04", + "loss": 0.577, + "slid_loss": 0.598, + "step": 4112, + "time": 10.8 + }, + { + "epoch": 3.95, + "learning_rate": "1.1282e-04", + "loss": 0.6569, + "slid_loss": 0.5983, + "step": 4113, + "time": 13.49 + }, + { + "epoch": 3.95, + "learning_rate": "1.1280e-04", + "loss": 0.6451, + "slid_loss": 0.5987, + "step": 4114, + "time": 12.76 + }, + { + "epoch": 3.95, + "learning_rate": "1.1278e-04", + "loss": 0.6669, + "slid_loss": 0.5988, + "step": 4115, + "time": 11.56 + }, + { + "epoch": 3.95, + "learning_rate": "1.1275e-04", + "loss": 0.6029, + "slid_loss": 0.5987, + "step": 4116, + "time": 13.95 + }, + { + "epoch": 3.95, + "learning_rate": "1.1273e-04", + "loss": 0.5764, + "slid_loss": 0.599, + "step": 4117, + "time": 12.62 + }, + { + "epoch": 3.96, + "learning_rate": "1.1271e-04", + "loss": 0.6039, + "slid_loss": 0.5993, + "step": 4118, + "time": 12.97 + }, + { + "epoch": 3.96, + "learning_rate": "1.1269e-04", + "loss": 0.5623, + "slid_loss": 0.599, + "step": 4119, + "time": 12.05 + }, + { + "epoch": 3.96, + "learning_rate": "1.1267e-04", + "loss": 0.5828, + "slid_loss": 0.5991, + "step": 4120, + "time": 13.96 + }, + { + "epoch": 3.96, + "learning_rate": "1.1264e-04", + "loss": 0.6189, + "slid_loss": 0.6, + "step": 4121, + "time": 13.92 + }, + { + "epoch": 3.96, + "learning_rate": "1.1262e-04", + "loss": 0.5815, + "slid_loss": 0.5999, + "step": 4122, + "time": 13.34 + }, + { + "epoch": 3.96, + "learning_rate": "1.1260e-04", + "loss": 0.6288, + "slid_loss": 0.5999, + "step": 4123, + "time": 12.48 + }, + { + "epoch": 3.96, + "learning_rate": "1.1258e-04", + "loss": 0.5672, + "slid_loss": 0.5994, + "step": 4124, + "time": 11.79 + }, + { + "epoch": 3.96, + "learning_rate": "1.1255e-04", + "loss": 0.5078, + "slid_loss": 0.5986, + "step": 4125, + "time": 11.44 + }, + { + "epoch": 3.96, + "learning_rate": "1.1253e-04", + "loss": 0.6021, + "slid_loss": 0.5989, + "step": 4126, + "time": 13.35 + }, + { + "epoch": 3.96, + "learning_rate": "1.1251e-04", + "loss": 0.6428, + "slid_loss": 0.5993, + "step": 4127, + "time": 12.83 + }, + { + "epoch": 3.97, + "learning_rate": "1.1249e-04", + "loss": 0.5662, + "slid_loss": 0.599, + "step": 4128, + "time": 13.88 + }, + { + "epoch": 3.97, + "learning_rate": "1.1247e-04", + "loss": 0.6131, + "slid_loss": 0.5989, + "step": 4129, + "time": 13.86 + }, + { + "epoch": 3.97, + "learning_rate": "1.1244e-04", + "loss": 0.5741, + "slid_loss": 0.5989, + "step": 4130, + "time": 13.22 + }, + { + "epoch": 3.97, + "learning_rate": "1.1242e-04", + "loss": 0.6084, + "slid_loss": 0.5986, + "step": 4131, + "time": 12.92 + }, + { + "epoch": 3.97, + "learning_rate": "1.1240e-04", + "loss": 0.5803, + "slid_loss": 0.5974, + "step": 4132, + "time": 12.84 + }, + { + "epoch": 3.97, + "learning_rate": "1.1238e-04", + "loss": 0.6413, + "slid_loss": 0.5979, + "step": 4133, + "time": 13.6 + }, + { + "epoch": 3.97, + "learning_rate": "1.1235e-04", + "loss": 0.4811, + "slid_loss": 0.598, + "step": 4134, + "time": 13.97 + }, + { + "epoch": 3.97, + "learning_rate": "1.1233e-04", + "loss": 0.5966, + "slid_loss": 0.5975, + "step": 4135, + "time": 12.97 + }, + { + "epoch": 3.97, + "learning_rate": "1.1231e-04", + "loss": 0.5623, + "slid_loss": 0.5964, + "step": 4136, + "time": 12.17 + }, + { + "epoch": 3.97, + "learning_rate": "1.1229e-04", + "loss": 0.5928, + "slid_loss": 0.5962, + "step": 4137, + "time": 11.16 + }, + { + "epoch": 3.98, + "learning_rate": "1.1227e-04", + "loss": 0.5565, + "slid_loss": 0.5956, + "step": 4138, + "time": 13.58 + }, + { + "epoch": 3.98, + "learning_rate": "1.1224e-04", + "loss": 0.5619, + "slid_loss": 0.5955, + "step": 4139, + "time": 12.8 + }, + { + "epoch": 3.98, + "learning_rate": "1.1222e-04", + "loss": 0.5439, + "slid_loss": 0.5946, + "step": 4140, + "time": 11.06 + }, + { + "epoch": 3.98, + "learning_rate": "1.1220e-04", + "loss": 0.6738, + "slid_loss": 0.5954, + "step": 4141, + "time": 14.75 + }, + { + "epoch": 3.98, + "learning_rate": "1.1218e-04", + "loss": 0.5953, + "slid_loss": 0.5955, + "step": 4142, + "time": 12.96 + }, + { + "epoch": 3.98, + "learning_rate": "1.1216e-04", + "loss": 0.6102, + "slid_loss": 0.596, + "step": 4143, + "time": 12.82 + }, + { + "epoch": 3.98, + "learning_rate": "1.1213e-04", + "loss": 0.5712, + "slid_loss": 0.5959, + "step": 4144, + "time": 12.95 + }, + { + "epoch": 3.98, + "learning_rate": "1.1211e-04", + "loss": 0.5446, + "slid_loss": 0.5956, + "step": 4145, + "time": 12.33 + }, + { + "epoch": 3.98, + "learning_rate": "1.1209e-04", + "loss": 0.5269, + "slid_loss": 0.5939, + "step": 4146, + "time": 13.82 + }, + { + "epoch": 3.98, + "learning_rate": "1.1207e-04", + "loss": 0.5192, + "slid_loss": 0.5933, + "step": 4147, + "time": 13.58 + }, + { + "epoch": 3.98, + "learning_rate": "1.1205e-04", + "loss": 0.5593, + "slid_loss": 0.5925, + "step": 4148, + "time": 12.82 + }, + { + "epoch": 3.99, + "learning_rate": "1.1203e-04", + "loss": 0.5058, + "slid_loss": 0.5915, + "step": 4149, + "time": 14.27 + }, + { + "epoch": 3.99, + "learning_rate": "1.1200e-04", + "loss": 0.5501, + "slid_loss": 0.5914, + "step": 4150, + "time": 11.06 + }, + { + "epoch": 3.99, + "learning_rate": "1.1198e-04", + "loss": 0.5322, + "slid_loss": 0.5902, + "step": 4151, + "time": 13.25 + }, + { + "epoch": 3.99, + "learning_rate": "1.1196e-04", + "loss": 0.56, + "slid_loss": 0.5904, + "step": 4152, + "time": 11.84 + }, + { + "epoch": 3.99, + "learning_rate": "1.1194e-04", + "loss": 0.5916, + "slid_loss": 0.59, + "step": 4153, + "time": 13.29 + }, + { + "epoch": 3.99, + "learning_rate": "1.1192e-04", + "loss": 0.5878, + "slid_loss": 0.5906, + "step": 4154, + "time": 13.22 + }, + { + "epoch": 3.99, + "learning_rate": "1.1189e-04", + "loss": 0.6276, + "slid_loss": 0.5905, + "step": 4155, + "time": 13.21 + }, + { + "epoch": 3.99, + "learning_rate": "1.1187e-04", + "loss": 0.6033, + "slid_loss": 0.5916, + "step": 4156, + "time": 11.38 + }, + { + "epoch": 3.99, + "learning_rate": "1.1185e-04", + "loss": 0.7522, + "slid_loss": 0.5924, + "step": 4157, + "time": 12.83 + }, + { + "epoch": 3.99, + "learning_rate": "1.1183e-04", + "loss": 0.6984, + "slid_loss": 0.5932, + "step": 4158, + "time": 12.86 + }, + { + "epoch": 4.0, + "learning_rate": "1.1181e-04", + "loss": 0.5446, + "slid_loss": 0.5931, + "step": 4159, + "time": 13.35 + }, + { + "epoch": 4.0, + "learning_rate": "1.1179e-04", + "loss": 0.4993, + "slid_loss": 0.5919, + "step": 4160, + "time": 11.56 + }, + { + "epoch": 4.0, + "learning_rate": "1.1176e-04", + "loss": 0.545, + "slid_loss": 0.5917, + "step": 4161, + "time": 13.23 + }, + { + "epoch": 4.0, + "learning_rate": "1.1174e-04", + "loss": 0.6502, + "slid_loss": 0.5918, + "step": 4162, + "time": 13.54 + }, + { + "epoch": 4.0, + "learning_rate": "1.1172e-04", + "loss": 0.5561, + "slid_loss": 0.592, + "step": 4163, + "time": 12.29 + }, + { + "epoch": 4.0, + "learning_rate": "1.1170e-04", + "loss": 0.603, + "slid_loss": 0.5916, + "step": 4164, + "time": 13.89 + }, + { + "epoch": 4.0, + "learning_rate": "1.1168e-04", + "loss": 0.5518, + "slid_loss": 0.5909, + "step": 4165, + "time": 166.37 + }, + { + "epoch": 4.0, + "learning_rate": "1.1166e-04", + "loss": 0.5478, + "slid_loss": 0.5903, + "step": 4166, + "time": 13.31 + }, + { + "epoch": 4.0, + "learning_rate": "1.1164e-04", + "loss": 0.6197, + "slid_loss": 0.5906, + "step": 4167, + "time": 13.03 + }, + { + "epoch": 4.0, + "learning_rate": "1.1161e-04", + "loss": 0.6394, + "slid_loss": 0.5905, + "step": 4168, + "time": 12.87 + }, + { + "epoch": 4.0, + "learning_rate": "1.1159e-04", + "loss": 0.6264, + "slid_loss": 0.5911, + "step": 4169, + "time": 13.5 + }, + { + "epoch": 4.01, + "learning_rate": "1.1157e-04", + "loss": 0.5409, + "slid_loss": 0.5907, + "step": 4170, + "time": 13.29 + }, + { + "epoch": 4.01, + "learning_rate": "1.1155e-04", + "loss": 0.6208, + "slid_loss": 0.5914, + "step": 4171, + "time": 12.31 + }, + { + "epoch": 4.01, + "learning_rate": "1.1153e-04", + "loss": 0.6154, + "slid_loss": 0.5918, + "step": 4172, + "time": 13.68 + }, + { + "epoch": 4.01, + "learning_rate": "1.1151e-04", + "loss": 0.5871, + "slid_loss": 0.5922, + "step": 4173, + "time": 13.25 + }, + { + "epoch": 4.01, + "learning_rate": "1.1149e-04", + "loss": 0.5877, + "slid_loss": 0.5913, + "step": 4174, + "time": 14.14 + }, + { + "epoch": 4.01, + "learning_rate": "1.1146e-04", + "loss": 0.5997, + "slid_loss": 0.5916, + "step": 4175, + "time": 13.98 + }, + { + "epoch": 4.01, + "learning_rate": "1.1144e-04", + "loss": 0.6019, + "slid_loss": 0.5914, + "step": 4176, + "time": 13.38 + }, + { + "epoch": 4.01, + "learning_rate": "1.1142e-04", + "loss": 0.572, + "slid_loss": 0.5917, + "step": 4177, + "time": 14.36 + }, + { + "epoch": 4.01, + "learning_rate": "1.1140e-04", + "loss": 0.5763, + "slid_loss": 0.5916, + "step": 4178, + "time": 13.37 + }, + { + "epoch": 4.01, + "learning_rate": "1.1138e-04", + "loss": 0.5558, + "slid_loss": 0.591, + "step": 4179, + "time": 11.18 + }, + { + "epoch": 4.02, + "learning_rate": "1.1136e-04", + "loss": 0.5822, + "slid_loss": 0.5909, + "step": 4180, + "time": 13.22 + }, + { + "epoch": 4.02, + "learning_rate": "1.1134e-04", + "loss": 0.5603, + "slid_loss": 0.5899, + "step": 4181, + "time": 11.43 + }, + { + "epoch": 4.02, + "learning_rate": "1.1131e-04", + "loss": 0.5613, + "slid_loss": 0.5895, + "step": 4182, + "time": 10.76 + }, + { + "epoch": 4.02, + "learning_rate": "1.1129e-04", + "loss": 0.5206, + "slid_loss": 0.5886, + "step": 4183, + "time": 11.39 + }, + { + "epoch": 4.02, + "learning_rate": "1.1127e-04", + "loss": 0.579, + "slid_loss": 0.5882, + "step": 4184, + "time": 10.63 + }, + { + "epoch": 4.02, + "learning_rate": "1.1125e-04", + "loss": 0.6052, + "slid_loss": 0.5878, + "step": 4185, + "time": 13.76 + }, + { + "epoch": 4.02, + "learning_rate": "1.1123e-04", + "loss": 0.4844, + "slid_loss": 0.5853, + "step": 4186, + "time": 13.41 + }, + { + "epoch": 4.02, + "learning_rate": "1.1121e-04", + "loss": 0.5044, + "slid_loss": 0.5853, + "step": 4187, + "time": 13.6 + }, + { + "epoch": 4.02, + "learning_rate": "1.1119e-04", + "loss": 0.5329, + "slid_loss": 0.5842, + "step": 4188, + "time": 13.16 + }, + { + "epoch": 4.02, + "learning_rate": "1.1117e-04", + "loss": 0.6431, + "slid_loss": 0.5856, + "step": 4189, + "time": 13.4 + }, + { + "epoch": 4.02, + "learning_rate": "1.1115e-04", + "loss": 0.5438, + "slid_loss": 0.586, + "step": 4190, + "time": 11.22 + }, + { + "epoch": 4.03, + "learning_rate": "1.1112e-04", + "loss": 0.5815, + "slid_loss": 0.5861, + "step": 4191, + "time": 11.75 + }, + { + "epoch": 4.03, + "learning_rate": "1.1110e-04", + "loss": 0.7033, + "slid_loss": 0.5869, + "step": 4192, + "time": 12.86 + }, + { + "epoch": 4.03, + "learning_rate": "1.1108e-04", + "loss": 0.6499, + "slid_loss": 0.5871, + "step": 4193, + "time": 13.45 + }, + { + "epoch": 4.03, + "learning_rate": "1.1106e-04", + "loss": 0.5641, + "slid_loss": 0.5872, + "step": 4194, + "time": 12.84 + }, + { + "epoch": 4.03, + "learning_rate": "1.1104e-04", + "loss": 0.5623, + "slid_loss": 0.5867, + "step": 4195, + "time": 12.8 + }, + { + "epoch": 4.03, + "learning_rate": "1.1102e-04", + "loss": 0.5654, + "slid_loss": 0.5863, + "step": 4196, + "time": 13.44 + }, + { + "epoch": 4.03, + "learning_rate": "1.1100e-04", + "loss": 0.5869, + "slid_loss": 0.5861, + "step": 4197, + "time": 13.62 + }, + { + "epoch": 4.03, + "learning_rate": "1.1098e-04", + "loss": 0.56, + "slid_loss": 0.5853, + "step": 4198, + "time": 13.67 + }, + { + "epoch": 4.03, + "learning_rate": "1.1096e-04", + "loss": 0.5671, + "slid_loss": 0.5838, + "step": 4199, + "time": 12.09 + }, + { + "epoch": 4.03, + "learning_rate": "1.1094e-04", + "loss": 0.5913, + "slid_loss": 0.5844, + "step": 4200, + "time": 13.26 + }, + { + "epoch": 4.04, + "learning_rate": "1.1091e-04", + "loss": 0.5586, + "slid_loss": 0.5841, + "step": 4201, + "time": 13.33 + }, + { + "epoch": 4.04, + "learning_rate": "1.1089e-04", + "loss": 0.5594, + "slid_loss": 0.5839, + "step": 4202, + "time": 12.82 + }, + { + "epoch": 4.04, + "learning_rate": "1.1087e-04", + "loss": 0.7161, + "slid_loss": 0.5859, + "step": 4203, + "time": 12.74 + }, + { + "epoch": 4.04, + "learning_rate": "1.1085e-04", + "loss": 0.5381, + "slid_loss": 0.5853, + "step": 4204, + "time": 13.36 + }, + { + "epoch": 4.04, + "learning_rate": "1.1083e-04", + "loss": 0.4979, + "slid_loss": 0.5833, + "step": 4205, + "time": 13.29 + }, + { + "epoch": 4.04, + "learning_rate": "1.1081e-04", + "loss": 0.6594, + "slid_loss": 0.5841, + "step": 4206, + "time": 11.31 + }, + { + "epoch": 4.04, + "learning_rate": "1.1079e-04", + "loss": 0.6511, + "slid_loss": 0.5839, + "step": 4207, + "time": 13.18 + }, + { + "epoch": 4.04, + "learning_rate": "1.1077e-04", + "loss": 0.5611, + "slid_loss": 0.584, + "step": 4208, + "time": 12.94 + }, + { + "epoch": 4.04, + "learning_rate": "1.1075e-04", + "loss": 0.5947, + "slid_loss": 0.5841, + "step": 4209, + "time": 13.31 + }, + { + "epoch": 4.04, + "learning_rate": "1.1073e-04", + "loss": 0.6093, + "slid_loss": 0.5847, + "step": 4210, + "time": 11.37 + }, + { + "epoch": 4.05, + "learning_rate": "1.1071e-04", + "loss": 0.6287, + "slid_loss": 0.5858, + "step": 4211, + "time": 12.92 + }, + { + "epoch": 4.05, + "learning_rate": "1.1069e-04", + "loss": 0.4982, + "slid_loss": 0.585, + "step": 4212, + "time": 11.82 + }, + { + "epoch": 4.05, + "learning_rate": "1.1066e-04", + "loss": 0.5714, + "slid_loss": 0.5841, + "step": 4213, + "time": 12.31 + }, + { + "epoch": 4.05, + "learning_rate": "1.1064e-04", + "loss": 0.5676, + "slid_loss": 0.5833, + "step": 4214, + "time": 14.13 + }, + { + "epoch": 4.05, + "learning_rate": "1.1062e-04", + "loss": 0.6607, + "slid_loss": 0.5833, + "step": 4215, + "time": 13.57 + }, + { + "epoch": 4.05, + "learning_rate": "1.1060e-04", + "loss": 0.6135, + "slid_loss": 0.5834, + "step": 4216, + "time": 13.0 + }, + { + "epoch": 4.05, + "learning_rate": "1.1058e-04", + "loss": 0.558, + "slid_loss": 0.5832, + "step": 4217, + "time": 11.59 + }, + { + "epoch": 4.05, + "learning_rate": "1.1056e-04", + "loss": 0.6425, + "slid_loss": 0.5836, + "step": 4218, + "time": 13.24 + }, + { + "epoch": 4.05, + "learning_rate": "1.1054e-04", + "loss": 0.635, + "slid_loss": 0.5843, + "step": 4219, + "time": 13.17 + }, + { + "epoch": 4.05, + "learning_rate": "1.1052e-04", + "loss": 0.5779, + "slid_loss": 0.5843, + "step": 4220, + "time": 12.64 + }, + { + "epoch": 4.05, + "learning_rate": "1.1050e-04", + "loss": 0.6459, + "slid_loss": 0.5845, + "step": 4221, + "time": 12.67 + }, + { + "epoch": 4.06, + "learning_rate": "1.1048e-04", + "loss": 0.5024, + "slid_loss": 0.5837, + "step": 4222, + "time": 13.71 + }, + { + "epoch": 4.06, + "learning_rate": "1.1046e-04", + "loss": 0.6053, + "slid_loss": 0.5835, + "step": 4223, + "time": 13.47 + }, + { + "epoch": 4.06, + "learning_rate": "1.1044e-04", + "loss": 0.5921, + "slid_loss": 0.5838, + "step": 4224, + "time": 12.83 + }, + { + "epoch": 4.06, + "learning_rate": "1.1042e-04", + "loss": 0.577, + "slid_loss": 0.5844, + "step": 4225, + "time": 13.0 + }, + { + "epoch": 4.06, + "learning_rate": "1.1040e-04", + "loss": 0.5671, + "slid_loss": 0.5841, + "step": 4226, + "time": 12.25 + }, + { + "epoch": 4.06, + "learning_rate": "1.1038e-04", + "loss": 0.7054, + "slid_loss": 0.5847, + "step": 4227, + "time": 12.8 + }, + { + "epoch": 4.06, + "learning_rate": "1.1036e-04", + "loss": 0.5666, + "slid_loss": 0.5847, + "step": 4228, + "time": 13.53 + }, + { + "epoch": 4.06, + "learning_rate": "1.1034e-04", + "loss": 0.5965, + "slid_loss": 0.5846, + "step": 4229, + "time": 13.51 + }, + { + "epoch": 4.06, + "learning_rate": "1.1032e-04", + "loss": 0.5912, + "slid_loss": 0.5847, + "step": 4230, + "time": 11.79 + }, + { + "epoch": 4.06, + "learning_rate": "1.1029e-04", + "loss": 0.5425, + "slid_loss": 0.5841, + "step": 4231, + "time": 14.0 + }, + { + "epoch": 4.07, + "learning_rate": "1.1027e-04", + "loss": 0.5704, + "slid_loss": 0.584, + "step": 4232, + "time": 13.33 + }, + { + "epoch": 4.07, + "learning_rate": "1.1025e-04", + "loss": 0.5127, + "slid_loss": 0.5827, + "step": 4233, + "time": 12.89 + }, + { + "epoch": 4.07, + "learning_rate": "1.1023e-04", + "loss": 0.5477, + "slid_loss": 0.5834, + "step": 4234, + "time": 12.31 + }, + { + "epoch": 4.07, + "learning_rate": "1.1021e-04", + "loss": 0.5216, + "slid_loss": 0.5826, + "step": 4235, + "time": 13.17 + }, + { + "epoch": 4.07, + "learning_rate": "1.1019e-04", + "loss": 0.6391, + "slid_loss": 0.5834, + "step": 4236, + "time": 13.63 + }, + { + "epoch": 4.07, + "learning_rate": "1.1017e-04", + "loss": 0.5674, + "slid_loss": 0.5831, + "step": 4237, + "time": 13.37 + }, + { + "epoch": 4.07, + "learning_rate": "1.1015e-04", + "loss": 0.5978, + "slid_loss": 0.5835, + "step": 4238, + "time": 13.64 + }, + { + "epoch": 4.07, + "learning_rate": "1.1013e-04", + "loss": 0.4992, + "slid_loss": 0.5829, + "step": 4239, + "time": 13.3 + }, + { + "epoch": 4.07, + "learning_rate": "1.1011e-04", + "loss": 0.5865, + "slid_loss": 0.5833, + "step": 4240, + "time": 12.76 + }, + { + "epoch": 4.07, + "learning_rate": "1.1009e-04", + "loss": 0.5741, + "slid_loss": 0.5823, + "step": 4241, + "time": 13.11 + }, + { + "epoch": 4.07, + "learning_rate": "1.1007e-04", + "loss": 0.5772, + "slid_loss": 0.5822, + "step": 4242, + "time": 13.45 + }, + { + "epoch": 4.08, + "learning_rate": "1.1005e-04", + "loss": 0.5783, + "slid_loss": 0.5818, + "step": 4243, + "time": 12.16 + }, + { + "epoch": 4.08, + "learning_rate": "1.1003e-04", + "loss": 0.5315, + "slid_loss": 0.5814, + "step": 4244, + "time": 13.41 + }, + { + "epoch": 4.08, + "learning_rate": "1.1001e-04", + "loss": 0.5487, + "slid_loss": 0.5815, + "step": 4245, + "time": 13.33 + }, + { + "epoch": 4.08, + "learning_rate": "1.0999e-04", + "loss": 0.5647, + "slid_loss": 0.5819, + "step": 4246, + "time": 12.77 + }, + { + "epoch": 4.08, + "learning_rate": "1.0997e-04", + "loss": 0.6098, + "slid_loss": 0.5828, + "step": 4247, + "time": 13.82 + }, + { + "epoch": 4.08, + "learning_rate": "1.0995e-04", + "loss": 0.5029, + "slid_loss": 0.5822, + "step": 4248, + "time": 13.6 + }, + { + "epoch": 4.08, + "learning_rate": "1.0993e-04", + "loss": 0.4987, + "slid_loss": 0.5821, + "step": 4249, + "time": 13.1 + }, + { + "epoch": 4.08, + "learning_rate": "1.0991e-04", + "loss": 0.5329, + "slid_loss": 0.582, + "step": 4250, + "time": 13.71 + }, + { + "epoch": 4.08, + "learning_rate": "1.0989e-04", + "loss": 0.6148, + "slid_loss": 0.5828, + "step": 4251, + "time": 11.78 + }, + { + "epoch": 4.08, + "learning_rate": "1.0987e-04", + "loss": 0.5762, + "slid_loss": 0.5829, + "step": 4252, + "time": 12.88 + }, + { + "epoch": 4.09, + "learning_rate": "1.0985e-04", + "loss": 0.6973, + "slid_loss": 0.584, + "step": 4253, + "time": 12.18 + }, + { + "epoch": 4.09, + "learning_rate": "1.0983e-04", + "loss": 0.5932, + "slid_loss": 0.5841, + "step": 4254, + "time": 12.92 + }, + { + "epoch": 4.09, + "learning_rate": "1.0981e-04", + "loss": 0.5841, + "slid_loss": 0.5836, + "step": 4255, + "time": 11.55 + }, + { + "epoch": 4.09, + "learning_rate": "1.0979e-04", + "loss": 0.5875, + "slid_loss": 0.5835, + "step": 4256, + "time": 14.34 + }, + { + "epoch": 4.09, + "learning_rate": "1.0977e-04", + "loss": 0.5127, + "slid_loss": 0.5811, + "step": 4257, + "time": 12.13 + }, + { + "epoch": 4.09, + "learning_rate": "1.0975e-04", + "loss": 0.6067, + "slid_loss": 0.5801, + "step": 4258, + "time": 12.19 + }, + { + "epoch": 4.09, + "learning_rate": "1.0973e-04", + "loss": 0.6113, + "slid_loss": 0.5808, + "step": 4259, + "time": 12.92 + }, + { + "epoch": 4.09, + "learning_rate": "1.0971e-04", + "loss": 0.5244, + "slid_loss": 0.5811, + "step": 4260, + "time": 13.86 + }, + { + "epoch": 4.09, + "learning_rate": "1.0969e-04", + "loss": 0.5904, + "slid_loss": 0.5815, + "step": 4261, + "time": 12.14 + }, + { + "epoch": 4.09, + "learning_rate": "1.0967e-04", + "loss": 0.5329, + "slid_loss": 0.5803, + "step": 4262, + "time": 12.02 + }, + { + "epoch": 4.1, + "learning_rate": "1.0965e-04", + "loss": 0.5185, + "slid_loss": 0.58, + "step": 4263, + "time": 12.79 + }, + { + "epoch": 4.1, + "learning_rate": "1.0963e-04", + "loss": 0.5831, + "slid_loss": 0.5798, + "step": 4264, + "time": 12.68 + }, + { + "epoch": 4.1, + "learning_rate": "1.0961e-04", + "loss": 0.5788, + "slid_loss": 0.58, + "step": 4265, + "time": 13.25 + }, + { + "epoch": 4.1, + "learning_rate": "1.0959e-04", + "loss": 0.5965, + "slid_loss": 0.5805, + "step": 4266, + "time": 13.48 + }, + { + "epoch": 4.1, + "learning_rate": "1.0957e-04", + "loss": 0.5964, + "slid_loss": 0.5803, + "step": 4267, + "time": 13.55 + }, + { + "epoch": 4.1, + "learning_rate": "1.0955e-04", + "loss": 0.5525, + "slid_loss": 0.5794, + "step": 4268, + "time": 13.72 + }, + { + "epoch": 4.1, + "learning_rate": "1.0953e-04", + "loss": 0.614, + "slid_loss": 0.5793, + "step": 4269, + "time": 11.24 + }, + { + "epoch": 4.1, + "learning_rate": "1.0951e-04", + "loss": 0.5847, + "slid_loss": 0.5797, + "step": 4270, + "time": 13.64 + }, + { + "epoch": 4.1, + "learning_rate": "1.0949e-04", + "loss": 0.5628, + "slid_loss": 0.5792, + "step": 4271, + "time": 13.73 + }, + { + "epoch": 4.1, + "learning_rate": "1.0947e-04", + "loss": 0.577, + "slid_loss": 0.5788, + "step": 4272, + "time": 12.8 + }, + { + "epoch": 4.1, + "learning_rate": "1.0945e-04", + "loss": 0.5564, + "slid_loss": 0.5785, + "step": 4273, + "time": 13.42 + }, + { + "epoch": 4.11, + "learning_rate": "1.0944e-04", + "loss": 0.6607, + "slid_loss": 0.5792, + "step": 4274, + "time": 13.57 + }, + { + "epoch": 4.11, + "learning_rate": "1.0942e-04", + "loss": 0.5974, + "slid_loss": 0.5792, + "step": 4275, + "time": 14.31 + }, + { + "epoch": 4.11, + "learning_rate": "1.0940e-04", + "loss": 0.5566, + "slid_loss": 0.5787, + "step": 4276, + "time": 12.58 + }, + { + "epoch": 4.11, + "learning_rate": "1.0938e-04", + "loss": 0.6618, + "slid_loss": 0.5796, + "step": 4277, + "time": 13.56 + }, + { + "epoch": 4.11, + "learning_rate": "1.0936e-04", + "loss": 0.6143, + "slid_loss": 0.58, + "step": 4278, + "time": 13.87 + }, + { + "epoch": 4.11, + "learning_rate": "1.0934e-04", + "loss": 0.4348, + "slid_loss": 0.5788, + "step": 4279, + "time": 12.43 + }, + { + "epoch": 4.11, + "learning_rate": "1.0932e-04", + "loss": 0.5726, + "slid_loss": 0.5787, + "step": 4280, + "time": 13.34 + }, + { + "epoch": 4.11, + "learning_rate": "1.0930e-04", + "loss": 0.6186, + "slid_loss": 0.5793, + "step": 4281, + "time": 13.18 + }, + { + "epoch": 4.11, + "learning_rate": "1.0928e-04", + "loss": 0.5011, + "slid_loss": 0.5787, + "step": 4282, + "time": 13.27 + }, + { + "epoch": 4.11, + "learning_rate": "1.0926e-04", + "loss": 0.5969, + "slid_loss": 0.5794, + "step": 4283, + "time": 13.98 + }, + { + "epoch": 4.12, + "learning_rate": "1.0924e-04", + "loss": 0.487, + "slid_loss": 0.5785, + "step": 4284, + "time": 12.73 + }, + { + "epoch": 4.12, + "learning_rate": "1.0922e-04", + "loss": 0.6923, + "slid_loss": 0.5794, + "step": 4285, + "time": 13.97 + }, + { + "epoch": 4.12, + "learning_rate": "1.0920e-04", + "loss": 0.5766, + "slid_loss": 0.5803, + "step": 4286, + "time": 13.13 + }, + { + "epoch": 4.12, + "learning_rate": "1.0918e-04", + "loss": 0.6397, + "slid_loss": 0.5817, + "step": 4287, + "time": 12.09 + }, + { + "epoch": 4.12, + "learning_rate": "1.0916e-04", + "loss": 0.5767, + "slid_loss": 0.5821, + "step": 4288, + "time": 11.9 + }, + { + "epoch": 4.12, + "learning_rate": "1.0914e-04", + "loss": 0.5242, + "slid_loss": 0.5809, + "step": 4289, + "time": 13.6 + }, + { + "epoch": 4.12, + "learning_rate": "1.0912e-04", + "loss": 0.5225, + "slid_loss": 0.5807, + "step": 4290, + "time": 13.06 + }, + { + "epoch": 4.12, + "learning_rate": "1.0910e-04", + "loss": 0.6269, + "slid_loss": 0.5812, + "step": 4291, + "time": 13.24 + }, + { + "epoch": 4.12, + "learning_rate": "1.0909e-04", + "loss": 0.539, + "slid_loss": 0.5795, + "step": 4292, + "time": 13.74 + }, + { + "epoch": 4.12, + "learning_rate": "1.0907e-04", + "loss": 0.5867, + "slid_loss": 0.5789, + "step": 4293, + "time": 13.26 + }, + { + "epoch": 4.12, + "learning_rate": "1.0905e-04", + "loss": 0.6056, + "slid_loss": 0.5793, + "step": 4294, + "time": 13.77 + }, + { + "epoch": 4.13, + "learning_rate": "1.0903e-04", + "loss": 0.4925, + "slid_loss": 0.5786, + "step": 4295, + "time": 13.63 + }, + { + "epoch": 4.13, + "learning_rate": "1.0901e-04", + "loss": 0.6271, + "slid_loss": 0.5792, + "step": 4296, + "time": 13.14 + }, + { + "epoch": 4.13, + "learning_rate": "1.0899e-04", + "loss": 0.5587, + "slid_loss": 0.5789, + "step": 4297, + "time": 13.63 + }, + { + "epoch": 4.13, + "learning_rate": "1.0897e-04", + "loss": 0.5397, + "slid_loss": 0.5787, + "step": 4298, + "time": 10.81 + }, + { + "epoch": 4.13, + "learning_rate": "1.0895e-04", + "loss": 0.5799, + "slid_loss": 0.5789, + "step": 4299, + "time": 12.0 + }, + { + "epoch": 4.13, + "learning_rate": "1.0893e-04", + "loss": 0.6091, + "slid_loss": 0.579, + "step": 4300, + "time": 13.59 + }, + { + "epoch": 4.13, + "learning_rate": "1.0891e-04", + "loss": 0.5922, + "slid_loss": 0.5794, + "step": 4301, + "time": 10.99 + }, + { + "epoch": 4.13, + "learning_rate": "1.0889e-04", + "loss": 0.5908, + "slid_loss": 0.5797, + "step": 4302, + "time": 13.21 + }, + { + "epoch": 4.13, + "learning_rate": "1.0887e-04", + "loss": 0.5425, + "slid_loss": 0.5779, + "step": 4303, + "time": 13.25 + }, + { + "epoch": 4.13, + "learning_rate": "1.0886e-04", + "loss": 0.5668, + "slid_loss": 0.5782, + "step": 4304, + "time": 13.35 + }, + { + "epoch": 4.14, + "learning_rate": "1.0884e-04", + "loss": 0.568, + "slid_loss": 0.5789, + "step": 4305, + "time": 11.64 + }, + { + "epoch": 4.14, + "learning_rate": "1.0882e-04", + "loss": 0.4701, + "slid_loss": 0.577, + "step": 4306, + "time": 13.92 + }, + { + "epoch": 4.14, + "learning_rate": "1.0880e-04", + "loss": 0.5937, + "slid_loss": 0.5765, + "step": 4307, + "time": 12.89 + }, + { + "epoch": 4.14, + "learning_rate": "1.0878e-04", + "loss": 0.5998, + "slid_loss": 0.5769, + "step": 4308, + "time": 13.27 + }, + { + "epoch": 4.14, + "learning_rate": "1.0876e-04", + "loss": 0.5799, + "slid_loss": 0.5767, + "step": 4309, + "time": 13.33 + }, + { + "epoch": 4.14, + "learning_rate": "1.0874e-04", + "loss": 0.6229, + "slid_loss": 0.5768, + "step": 4310, + "time": 12.13 + }, + { + "epoch": 4.14, + "learning_rate": "1.0872e-04", + "loss": 0.5331, + "slid_loss": 0.5759, + "step": 4311, + "time": 13.35 + }, + { + "epoch": 4.14, + "learning_rate": "1.0870e-04", + "loss": 0.5623, + "slid_loss": 0.5765, + "step": 4312, + "time": 13.81 + }, + { + "epoch": 4.14, + "learning_rate": "1.0868e-04", + "loss": 0.6547, + "slid_loss": 0.5774, + "step": 4313, + "time": 13.0 + }, + { + "epoch": 4.14, + "learning_rate": "1.0867e-04", + "loss": 0.4918, + "slid_loss": 0.5766, + "step": 4314, + "time": 13.29 + }, + { + "epoch": 4.15, + "learning_rate": "1.0865e-04", + "loss": 0.55, + "slid_loss": 0.5755, + "step": 4315, + "time": 12.82 + }, + { + "epoch": 4.15, + "learning_rate": "1.0863e-04", + "loss": 0.5527, + "slid_loss": 0.5749, + "step": 4316, + "time": 12.77 + }, + { + "epoch": 4.15, + "learning_rate": "1.0861e-04", + "loss": 0.5673, + "slid_loss": 0.575, + "step": 4317, + "time": 12.3 + }, + { + "epoch": 4.15, + "learning_rate": "1.0859e-04", + "loss": 0.515, + "slid_loss": 0.5737, + "step": 4318, + "time": 13.21 + }, + { + "epoch": 4.15, + "learning_rate": "1.0857e-04", + "loss": 0.4943, + "slid_loss": 0.5723, + "step": 4319, + "time": 11.26 + }, + { + "epoch": 4.15, + "learning_rate": "1.0855e-04", + "loss": 0.5458, + "slid_loss": 0.572, + "step": 4320, + "time": 14.21 + }, + { + "epoch": 4.15, + "learning_rate": "1.0853e-04", + "loss": 0.559, + "slid_loss": 0.5711, + "step": 4321, + "time": 12.84 + }, + { + "epoch": 4.15, + "learning_rate": "1.0852e-04", + "loss": 0.5555, + "slid_loss": 0.5716, + "step": 4322, + "time": 12.03 + }, + { + "epoch": 4.15, + "learning_rate": "1.0850e-04", + "loss": 0.6548, + "slid_loss": 0.5721, + "step": 4323, + "time": 12.87 + }, + { + "epoch": 4.15, + "learning_rate": "1.0848e-04", + "loss": 0.4743, + "slid_loss": 0.571, + "step": 4324, + "time": 12.93 + }, + { + "epoch": 4.15, + "learning_rate": "1.0846e-04", + "loss": 0.6153, + "slid_loss": 0.5713, + "step": 4325, + "time": 13.48 + }, + { + "epoch": 4.16, + "learning_rate": "1.0844e-04", + "loss": 0.5441, + "slid_loss": 0.5711, + "step": 4326, + "time": 13.22 + }, + { + "epoch": 4.16, + "learning_rate": "1.0842e-04", + "loss": 0.5518, + "slid_loss": 0.5696, + "step": 4327, + "time": 13.98 + }, + { + "epoch": 4.16, + "learning_rate": "1.0840e-04", + "loss": 0.5863, + "slid_loss": 0.5698, + "step": 4328, + "time": 12.73 + }, + { + "epoch": 4.16, + "learning_rate": "1.0838e-04", + "loss": 0.5523, + "slid_loss": 0.5693, + "step": 4329, + "time": 13.67 + }, + { + "epoch": 4.16, + "learning_rate": "1.0837e-04", + "loss": 0.5799, + "slid_loss": 0.5692, + "step": 4330, + "time": 12.98 + }, + { + "epoch": 4.16, + "learning_rate": "1.0835e-04", + "loss": 0.564, + "slid_loss": 0.5694, + "step": 4331, + "time": 13.47 + }, + { + "epoch": 4.16, + "learning_rate": "1.0833e-04", + "loss": 0.652, + "slid_loss": 0.5702, + "step": 4332, + "time": 12.47 + }, + { + "epoch": 4.16, + "learning_rate": "1.0831e-04", + "loss": 0.6051, + "slid_loss": 0.5712, + "step": 4333, + "time": 13.59 + }, + { + "epoch": 4.16, + "learning_rate": "1.0829e-04", + "loss": 0.5991, + "slid_loss": 0.5717, + "step": 4334, + "time": 10.95 + }, + { + "epoch": 4.16, + "learning_rate": "1.0827e-04", + "loss": 0.5713, + "slid_loss": 0.5722, + "step": 4335, + "time": 12.79 + }, + { + "epoch": 4.17, + "learning_rate": "1.0825e-04", + "loss": 0.6023, + "slid_loss": 0.5718, + "step": 4336, + "time": 13.93 + }, + { + "epoch": 4.17, + "learning_rate": "1.0824e-04", + "loss": 0.6313, + "slid_loss": 0.5725, + "step": 4337, + "time": 13.2 + }, + { + "epoch": 4.17, + "learning_rate": "1.0822e-04", + "loss": 0.5566, + "slid_loss": 0.572, + "step": 4338, + "time": 11.56 + }, + { + "epoch": 4.17, + "learning_rate": "1.0820e-04", + "loss": 0.536, + "slid_loss": 0.5724, + "step": 4339, + "time": 13.89 + }, + { + "epoch": 4.17, + "learning_rate": "1.0818e-04", + "loss": 0.6758, + "slid_loss": 0.5733, + "step": 4340, + "time": 12.92 + }, + { + "epoch": 4.17, + "learning_rate": "1.0816e-04", + "loss": 0.58, + "slid_loss": 0.5734, + "step": 4341, + "time": 13.47 + }, + { + "epoch": 4.17, + "learning_rate": "1.0814e-04", + "loss": 0.6013, + "slid_loss": 0.5736, + "step": 4342, + "time": 13.41 + }, + { + "epoch": 4.17, + "learning_rate": "1.0813e-04", + "loss": 0.4509, + "slid_loss": 0.5723, + "step": 4343, + "time": 13.96 + }, + { + "epoch": 4.17, + "learning_rate": "1.0811e-04", + "loss": 0.5583, + "slid_loss": 0.5726, + "step": 4344, + "time": 11.31 + }, + { + "epoch": 4.17, + "learning_rate": "1.0809e-04", + "loss": 0.5654, + "slid_loss": 0.5728, + "step": 4345, + "time": 14.76 + }, + { + "epoch": 4.17, + "learning_rate": "1.0807e-04", + "loss": 0.6198, + "slid_loss": 0.5733, + "step": 4346, + "time": 12.85 + }, + { + "epoch": 4.18, + "learning_rate": "1.0805e-04", + "loss": 0.5075, + "slid_loss": 0.5723, + "step": 4347, + "time": 13.4 + }, + { + "epoch": 4.18, + "learning_rate": "1.0803e-04", + "loss": 0.5373, + "slid_loss": 0.5726, + "step": 4348, + "time": 12.27 + }, + { + "epoch": 4.18, + "learning_rate": "1.0802e-04", + "loss": 0.5579, + "slid_loss": 0.5732, + "step": 4349, + "time": 13.48 + }, + { + "epoch": 4.18, + "learning_rate": "1.0800e-04", + "loss": 0.5451, + "slid_loss": 0.5733, + "step": 4350, + "time": 13.55 + }, + { + "epoch": 4.18, + "learning_rate": "1.0798e-04", + "loss": 0.5267, + "slid_loss": 0.5725, + "step": 4351, + "time": 14.21 + }, + { + "epoch": 4.18, + "learning_rate": "1.0796e-04", + "loss": 0.4838, + "slid_loss": 0.5715, + "step": 4352, + "time": 14.48 + }, + { + "epoch": 4.18, + "learning_rate": "1.0794e-04", + "loss": 0.6804, + "slid_loss": 0.5714, + "step": 4353, + "time": 14.35 + }, + { + "epoch": 4.18, + "learning_rate": "1.0793e-04", + "loss": 0.5933, + "slid_loss": 0.5714, + "step": 4354, + "time": 13.36 + }, + { + "epoch": 4.18, + "learning_rate": "1.0791e-04", + "loss": 0.5706, + "slid_loss": 0.5712, + "step": 4355, + "time": 13.21 + }, + { + "epoch": 4.18, + "learning_rate": "1.0789e-04", + "loss": 0.6045, + "slid_loss": 0.5714, + "step": 4356, + "time": 14.15 + }, + { + "epoch": 4.19, + "learning_rate": "1.0787e-04", + "loss": 0.5503, + "slid_loss": 0.5718, + "step": 4357, + "time": 13.2 + }, + { + "epoch": 4.19, + "learning_rate": "1.0785e-04", + "loss": 0.6375, + "slid_loss": 0.5721, + "step": 4358, + "time": 12.97 + }, + { + "epoch": 4.19, + "learning_rate": "1.0784e-04", + "loss": 0.4895, + "slid_loss": 0.5709, + "step": 4359, + "time": 12.8 + }, + { + "epoch": 4.19, + "learning_rate": "1.0782e-04", + "loss": 0.5802, + "slid_loss": 0.5714, + "step": 4360, + "time": 12.95 + }, + { + "epoch": 4.19, + "learning_rate": "1.0780e-04", + "loss": 0.6682, + "slid_loss": 0.5722, + "step": 4361, + "time": 12.8 + }, + { + "epoch": 4.19, + "learning_rate": "1.0778e-04", + "loss": 0.552, + "slid_loss": 0.5724, + "step": 4362, + "time": 12.46 + }, + { + "epoch": 4.19, + "learning_rate": "1.0776e-04", + "loss": 0.534, + "slid_loss": 0.5726, + "step": 4363, + "time": 13.78 + }, + { + "epoch": 4.19, + "learning_rate": "1.0775e-04", + "loss": 0.619, + "slid_loss": 0.5729, + "step": 4364, + "time": 12.87 + }, + { + "epoch": 4.19, + "learning_rate": "1.0773e-04", + "loss": 0.5705, + "slid_loss": 0.5728, + "step": 4365, + "time": 14.74 + }, + { + "epoch": 4.19, + "learning_rate": "1.0771e-04", + "loss": 0.5997, + "slid_loss": 0.5729, + "step": 4366, + "time": 12.9 + }, + { + "epoch": 4.2, + "learning_rate": "1.0769e-04", + "loss": 0.5418, + "slid_loss": 0.5723, + "step": 4367, + "time": 11.29 + }, + { + "epoch": 4.2, + "learning_rate": "1.0767e-04", + "loss": 0.5783, + "slid_loss": 0.5726, + "step": 4368, + "time": 12.84 + }, + { + "epoch": 4.2, + "learning_rate": "1.0766e-04", + "loss": 0.5343, + "slid_loss": 0.5718, + "step": 4369, + "time": 12.75 + }, + { + "epoch": 4.2, + "learning_rate": "1.0764e-04", + "loss": 0.5112, + "slid_loss": 0.5711, + "step": 4370, + "time": 13.89 + }, + { + "epoch": 4.2, + "learning_rate": "1.0762e-04", + "loss": 0.5011, + "slid_loss": 0.5704, + "step": 4371, + "time": 12.78 + }, + { + "epoch": 4.2, + "learning_rate": "1.0760e-04", + "loss": 0.6308, + "slid_loss": 0.571, + "step": 4372, + "time": 13.2 + }, + { + "epoch": 4.2, + "learning_rate": "1.0758e-04", + "loss": 0.541, + "slid_loss": 0.5708, + "step": 4373, + "time": 13.99 + }, + { + "epoch": 4.2, + "learning_rate": "1.0757e-04", + "loss": 0.5747, + "slid_loss": 0.57, + "step": 4374, + "time": 13.53 + }, + { + "epoch": 4.2, + "learning_rate": "1.0755e-04", + "loss": 0.5601, + "slid_loss": 0.5696, + "step": 4375, + "time": 13.21 + }, + { + "epoch": 4.2, + "learning_rate": "1.0753e-04", + "loss": 0.5634, + "slid_loss": 0.5697, + "step": 4376, + "time": 12.84 + }, + { + "epoch": 4.2, + "learning_rate": "1.0751e-04", + "loss": 0.4931, + "slid_loss": 0.568, + "step": 4377, + "time": 11.92 + }, + { + "epoch": 4.21, + "learning_rate": "1.0750e-04", + "loss": 0.6246, + "slid_loss": 0.5681, + "step": 4378, + "time": 13.85 + }, + { + "epoch": 4.21, + "learning_rate": "1.0748e-04", + "loss": 0.5722, + "slid_loss": 0.5694, + "step": 4379, + "time": 12.86 + }, + { + "epoch": 4.21, + "learning_rate": "1.0746e-04", + "loss": 0.5502, + "slid_loss": 0.5692, + "step": 4380, + "time": 13.3 + }, + { + "epoch": 4.21, + "learning_rate": "1.0744e-04", + "loss": 0.5715, + "slid_loss": 0.5687, + "step": 4381, + "time": 11.64 + }, + { + "epoch": 4.21, + "learning_rate": "1.0743e-04", + "loss": 0.5815, + "slid_loss": 0.5696, + "step": 4382, + "time": 12.74 + }, + { + "epoch": 4.21, + "learning_rate": "1.0741e-04", + "loss": 0.7127, + "slid_loss": 0.5707, + "step": 4383, + "time": 13.06 + }, + { + "epoch": 4.21, + "learning_rate": "1.0739e-04", + "loss": 0.6869, + "slid_loss": 0.5727, + "step": 4384, + "time": 14.21 + }, + { + "epoch": 4.21, + "learning_rate": "1.0737e-04", + "loss": 0.6397, + "slid_loss": 0.5722, + "step": 4385, + "time": 13.11 + }, + { + "epoch": 4.21, + "learning_rate": "1.0736e-04", + "loss": 0.5562, + "slid_loss": 0.572, + "step": 4386, + "time": 14.08 + }, + { + "epoch": 4.21, + "learning_rate": "1.0734e-04", + "loss": 0.5471, + "slid_loss": 0.5711, + "step": 4387, + "time": 13.17 + }, + { + "epoch": 4.22, + "learning_rate": "1.0732e-04", + "loss": 0.5869, + "slid_loss": 0.5712, + "step": 4388, + "time": 13.48 + }, + { + "epoch": 4.22, + "learning_rate": "1.0730e-04", + "loss": 0.5595, + "slid_loss": 0.5715, + "step": 4389, + "time": 14.19 + }, + { + "epoch": 4.22, + "learning_rate": "1.0729e-04", + "loss": 0.5296, + "slid_loss": 0.5716, + "step": 4390, + "time": 13.65 + }, + { + "epoch": 4.22, + "learning_rate": "1.0727e-04", + "loss": 0.4787, + "slid_loss": 0.5701, + "step": 4391, + "time": 12.15 + }, + { + "epoch": 4.22, + "learning_rate": "1.0725e-04", + "loss": 0.5321, + "slid_loss": 0.57, + "step": 4392, + "time": 12.07 + }, + { + "epoch": 4.22, + "learning_rate": "1.0723e-04", + "loss": 0.5056, + "slid_loss": 0.5692, + "step": 4393, + "time": 13.65 + }, + { + "epoch": 4.22, + "learning_rate": "1.0722e-04", + "loss": 0.5397, + "slid_loss": 0.5686, + "step": 4394, + "time": 13.64 + }, + { + "epoch": 4.22, + "learning_rate": "1.0720e-04", + "loss": 0.553, + "slid_loss": 0.5692, + "step": 4395, + "time": 13.83 + }, + { + "epoch": 4.22, + "learning_rate": "1.0718e-04", + "loss": 0.6374, + "slid_loss": 0.5693, + "step": 4396, + "time": 13.52 + }, + { + "epoch": 4.22, + "learning_rate": "1.0716e-04", + "loss": 0.4997, + "slid_loss": 0.5687, + "step": 4397, + "time": 11.85 + }, + { + "epoch": 4.22, + "learning_rate": "1.0715e-04", + "loss": 0.6042, + "slid_loss": 0.5693, + "step": 4398, + "time": 13.94 + }, + { + "epoch": 4.23, + "learning_rate": "1.0713e-04", + "loss": 0.4681, + "slid_loss": 0.5682, + "step": 4399, + "time": 13.07 + }, + { + "epoch": 4.23, + "learning_rate": "1.0711e-04", + "loss": 0.5529, + "slid_loss": 0.5676, + "step": 4400, + "time": 12.82 + }, + { + "epoch": 4.23, + "learning_rate": "1.0710e-04", + "loss": 0.5447, + "slid_loss": 0.5672, + "step": 4401, + "time": 13.32 + }, + { + "epoch": 4.23, + "learning_rate": "1.0708e-04", + "loss": 0.5756, + "slid_loss": 0.567, + "step": 4402, + "time": 13.92 + }, + { + "epoch": 4.23, + "learning_rate": "1.0706e-04", + "loss": 0.5784, + "slid_loss": 0.5674, + "step": 4403, + "time": 13.82 + }, + { + "epoch": 4.23, + "learning_rate": "1.0704e-04", + "loss": 0.5788, + "slid_loss": 0.5675, + "step": 4404, + "time": 12.69 + }, + { + "epoch": 4.23, + "learning_rate": "1.0703e-04", + "loss": 0.5963, + "slid_loss": 0.5678, + "step": 4405, + "time": 12.97 + }, + { + "epoch": 4.23, + "learning_rate": "1.0701e-04", + "loss": 0.5728, + "slid_loss": 0.5688, + "step": 4406, + "time": 13.77 + }, + { + "epoch": 4.23, + "learning_rate": "1.0699e-04", + "loss": 0.5906, + "slid_loss": 0.5688, + "step": 4407, + "time": 13.39 + }, + { + "epoch": 4.23, + "learning_rate": "1.0698e-04", + "loss": 0.5218, + "slid_loss": 0.568, + "step": 4408, + "time": 13.74 + }, + { + "epoch": 4.24, + "learning_rate": "1.0696e-04", + "loss": 0.5883, + "slid_loss": 0.5681, + "step": 4409, + "time": 13.06 + }, + { + "epoch": 4.24, + "learning_rate": "1.0694e-04", + "loss": 0.5248, + "slid_loss": 0.5671, + "step": 4410, + "time": 10.85 + }, + { + "epoch": 4.24, + "learning_rate": "1.0692e-04", + "loss": 0.5462, + "slid_loss": 0.5672, + "step": 4411, + "time": 12.77 + }, + { + "epoch": 4.24, + "learning_rate": "1.0691e-04", + "loss": 0.5649, + "slid_loss": 0.5673, + "step": 4412, + "time": 13.36 + }, + { + "epoch": 4.24, + "learning_rate": "1.0689e-04", + "loss": 0.59, + "slid_loss": 0.5666, + "step": 4413, + "time": 12.67 + }, + { + "epoch": 4.24, + "learning_rate": "1.0687e-04", + "loss": 0.5554, + "slid_loss": 0.5672, + "step": 4414, + "time": 13.92 + }, + { + "epoch": 4.24, + "learning_rate": "1.0686e-04", + "loss": 0.5741, + "slid_loss": 0.5675, + "step": 4415, + "time": 13.47 + }, + { + "epoch": 4.24, + "learning_rate": "1.0684e-04", + "loss": 0.5373, + "slid_loss": 0.5673, + "step": 4416, + "time": 11.59 + }, + { + "epoch": 4.24, + "learning_rate": "1.0682e-04", + "loss": 0.523, + "slid_loss": 0.5669, + "step": 4417, + "time": 11.37 + }, + { + "epoch": 4.24, + "learning_rate": "1.0681e-04", + "loss": 0.565, + "slid_loss": 0.5674, + "step": 4418, + "time": 11.39 + }, + { + "epoch": 4.24, + "learning_rate": "1.0679e-04", + "loss": 0.5234, + "slid_loss": 0.5677, + "step": 4419, + "time": 13.39 + }, + { + "epoch": 4.25, + "learning_rate": "1.0677e-04", + "loss": 0.5417, + "slid_loss": 0.5676, + "step": 4420, + "time": 10.87 + }, + { + "epoch": 4.25, + "learning_rate": "1.0675e-04", + "loss": 0.5772, + "slid_loss": 0.5678, + "step": 4421, + "time": 11.54 + }, + { + "epoch": 4.25, + "learning_rate": "1.0674e-04", + "loss": 0.5095, + "slid_loss": 0.5674, + "step": 4422, + "time": 13.79 + }, + { + "epoch": 4.25, + "learning_rate": "1.0672e-04", + "loss": 0.5318, + "slid_loss": 0.5661, + "step": 4423, + "time": 12.64 + }, + { + "epoch": 4.25, + "learning_rate": "1.0670e-04", + "loss": 0.5761, + "slid_loss": 0.5671, + "step": 4424, + "time": 13.66 + }, + { + "epoch": 4.25, + "learning_rate": "1.0669e-04", + "loss": 0.5633, + "slid_loss": 0.5666, + "step": 4425, + "time": 13.37 + }, + { + "epoch": 4.25, + "learning_rate": "1.0667e-04", + "loss": 0.5, + "slid_loss": 0.5662, + "step": 4426, + "time": 13.65 + }, + { + "epoch": 4.25, + "learning_rate": "1.0665e-04", + "loss": 0.5033, + "slid_loss": 0.5657, + "step": 4427, + "time": 13.82 + }, + { + "epoch": 4.25, + "learning_rate": "1.0664e-04", + "loss": 0.5214, + "slid_loss": 0.5651, + "step": 4428, + "time": 12.88 + }, + { + "epoch": 4.25, + "learning_rate": "1.0662e-04", + "loss": 0.5183, + "slid_loss": 0.5647, + "step": 4429, + "time": 13.27 + }, + { + "epoch": 4.26, + "learning_rate": "1.0660e-04", + "loss": 0.6519, + "slid_loss": 0.5654, + "step": 4430, + "time": 12.7 + }, + { + "epoch": 4.26, + "learning_rate": "1.0659e-04", + "loss": 0.6079, + "slid_loss": 0.5659, + "step": 4431, + "time": 12.77 + }, + { + "epoch": 4.26, + "learning_rate": "1.0657e-04", + "loss": 0.59, + "slid_loss": 0.5653, + "step": 4432, + "time": 13.91 + }, + { + "epoch": 4.26, + "learning_rate": "1.0655e-04", + "loss": 0.6267, + "slid_loss": 0.5655, + "step": 4433, + "time": 14.08 + }, + { + "epoch": 4.26, + "learning_rate": "1.0654e-04", + "loss": 0.5343, + "slid_loss": 0.5648, + "step": 4434, + "time": 13.32 + }, + { + "epoch": 4.26, + "learning_rate": "1.0652e-04", + "loss": 0.5887, + "slid_loss": 0.565, + "step": 4435, + "time": 12.92 + }, + { + "epoch": 4.26, + "learning_rate": "1.0650e-04", + "loss": 0.5544, + "slid_loss": 0.5645, + "step": 4436, + "time": 13.52 + }, + { + "epoch": 4.26, + "learning_rate": "1.0649e-04", + "loss": 0.5707, + "slid_loss": 0.5639, + "step": 4437, + "time": 12.46 + }, + { + "epoch": 4.26, + "learning_rate": "1.0647e-04", + "loss": 0.6036, + "slid_loss": 0.5644, + "step": 4438, + "time": 13.51 + }, + { + "epoch": 4.26, + "learning_rate": "1.0645e-04", + "loss": 0.6265, + "slid_loss": 0.5653, + "step": 4439, + "time": 14.18 + }, + { + "epoch": 4.27, + "learning_rate": "1.0644e-04", + "loss": 0.5398, + "slid_loss": 0.5639, + "step": 4440, + "time": 12.97 + }, + { + "epoch": 4.27, + "learning_rate": "1.0642e-04", + "loss": 0.5426, + "slid_loss": 0.5635, + "step": 4441, + "time": 13.14 + }, + { + "epoch": 4.27, + "learning_rate": "1.0641e-04", + "loss": 0.4603, + "slid_loss": 0.5621, + "step": 4442, + "time": 13.33 + }, + { + "epoch": 4.27, + "learning_rate": "1.0639e-04", + "loss": 0.5677, + "slid_loss": 0.5633, + "step": 4443, + "time": 12.77 + }, + { + "epoch": 4.27, + "learning_rate": "1.0637e-04", + "loss": 0.507, + "slid_loss": 0.5628, + "step": 4444, + "time": 13.76 + }, + { + "epoch": 4.27, + "learning_rate": "1.0636e-04", + "loss": 0.5395, + "slid_loss": 0.5625, + "step": 4445, + "time": 13.72 + }, + { + "epoch": 4.27, + "learning_rate": "1.0634e-04", + "loss": 0.6298, + "slid_loss": 0.5626, + "step": 4446, + "time": 13.35 + }, + { + "epoch": 4.27, + "learning_rate": "1.0632e-04", + "loss": 0.5373, + "slid_loss": 0.5629, + "step": 4447, + "time": 13.82 + }, + { + "epoch": 4.27, + "learning_rate": "1.0631e-04", + "loss": 0.5597, + "slid_loss": 0.5632, + "step": 4448, + "time": 12.44 + }, + { + "epoch": 4.27, + "learning_rate": "1.0629e-04", + "loss": 0.646, + "slid_loss": 0.564, + "step": 4449, + "time": 12.87 + }, + { + "epoch": 4.27, + "learning_rate": "1.0627e-04", + "loss": 0.5208, + "slid_loss": 0.5638, + "step": 4450, + "time": 12.86 + }, + { + "epoch": 4.28, + "learning_rate": "1.0626e-04", + "loss": 0.5568, + "slid_loss": 0.5641, + "step": 4451, + "time": 12.29 + }, + { + "epoch": 4.28, + "learning_rate": "1.0624e-04", + "loss": 0.5771, + "slid_loss": 0.565, + "step": 4452, + "time": 12.08 + }, + { + "epoch": 4.28, + "learning_rate": "1.0623e-04", + "loss": 0.5579, + "slid_loss": 0.5638, + "step": 4453, + "time": 13.14 + }, + { + "epoch": 4.28, + "learning_rate": "1.0621e-04", + "loss": 0.5298, + "slid_loss": 0.5632, + "step": 4454, + "time": 14.23 + }, + { + "epoch": 4.28, + "learning_rate": "1.0619e-04", + "loss": 0.5334, + "slid_loss": 0.5628, + "step": 4455, + "time": 11.79 + }, + { + "epoch": 4.28, + "learning_rate": "1.0618e-04", + "loss": 0.6166, + "slid_loss": 0.5629, + "step": 4456, + "time": 12.52 + }, + { + "epoch": 4.28, + "learning_rate": "1.0616e-04", + "loss": 0.6536, + "slid_loss": 0.564, + "step": 4457, + "time": 13.02 + }, + { + "epoch": 4.28, + "learning_rate": "1.0615e-04", + "loss": 0.5476, + "slid_loss": 0.5631, + "step": 4458, + "time": 12.34 + }, + { + "epoch": 4.28, + "learning_rate": "1.0613e-04", + "loss": 0.4697, + "slid_loss": 0.5629, + "step": 4459, + "time": 13.13 + }, + { + "epoch": 4.28, + "learning_rate": "1.0611e-04", + "loss": 0.5542, + "slid_loss": 0.5626, + "step": 4460, + "time": 11.71 + }, + { + "epoch": 4.29, + "learning_rate": "1.0610e-04", + "loss": 0.4617, + "slid_loss": 0.5605, + "step": 4461, + "time": 13.2 + }, + { + "epoch": 4.29, + "learning_rate": "1.0608e-04", + "loss": 0.6238, + "slid_loss": 0.5612, + "step": 4462, + "time": 11.9 + }, + { + "epoch": 4.29, + "learning_rate": "1.0607e-04", + "loss": 0.5374, + "slid_loss": 0.5613, + "step": 4463, + "time": 13.44 + }, + { + "epoch": 4.29, + "learning_rate": "1.0605e-04", + "loss": 0.4013, + "slid_loss": 0.5591, + "step": 4464, + "time": 12.15 + }, + { + "epoch": 4.29, + "learning_rate": "1.0603e-04", + "loss": 0.4663, + "slid_loss": 0.5581, + "step": 4465, + "time": 13.56 + }, + { + "epoch": 4.29, + "learning_rate": "1.0602e-04", + "loss": 0.5516, + "slid_loss": 0.5576, + "step": 4466, + "time": 13.04 + }, + { + "epoch": 4.29, + "learning_rate": "1.0600e-04", + "loss": 0.5523, + "slid_loss": 0.5577, + "step": 4467, + "time": 12.39 + }, + { + "epoch": 4.29, + "learning_rate": "1.0599e-04", + "loss": 0.5163, + "slid_loss": 0.5571, + "step": 4468, + "time": 12.54 + }, + { + "epoch": 4.29, + "learning_rate": "1.0597e-04", + "loss": 0.6144, + "slid_loss": 0.5579, + "step": 4469, + "time": 14.16 + }, + { + "epoch": 4.29, + "learning_rate": "1.0595e-04", + "loss": 0.6424, + "slid_loss": 0.5592, + "step": 4470, + "time": 14.28 + }, + { + "epoch": 4.29, + "learning_rate": "1.0594e-04", + "loss": 0.5203, + "slid_loss": 0.5594, + "step": 4471, + "time": 13.97 + }, + { + "epoch": 4.3, + "learning_rate": "1.0592e-04", + "loss": 0.5061, + "slid_loss": 0.5581, + "step": 4472, + "time": 13.68 + }, + { + "epoch": 4.3, + "learning_rate": "1.0591e-04", + "loss": 0.5977, + "slid_loss": 0.5587, + "step": 4473, + "time": 12.78 + }, + { + "epoch": 4.3, + "learning_rate": "1.0589e-04", + "loss": 0.5824, + "slid_loss": 0.5588, + "step": 4474, + "time": 13.47 + }, + { + "epoch": 4.3, + "learning_rate": "1.0587e-04", + "loss": 0.611, + "slid_loss": 0.5593, + "step": 4475, + "time": 12.87 + }, + { + "epoch": 4.3, + "learning_rate": "1.0586e-04", + "loss": 0.5908, + "slid_loss": 0.5596, + "step": 4476, + "time": 12.91 + }, + { + "epoch": 4.3, + "learning_rate": "1.0584e-04", + "loss": 0.558, + "slid_loss": 0.5602, + "step": 4477, + "time": 13.82 + }, + { + "epoch": 4.3, + "learning_rate": "1.0583e-04", + "loss": 0.6553, + "slid_loss": 0.5605, + "step": 4478, + "time": 13.75 + }, + { + "epoch": 4.3, + "learning_rate": "1.0581e-04", + "loss": 0.5998, + "slid_loss": 0.5608, + "step": 4479, + "time": 11.89 + }, + { + "epoch": 4.3, + "learning_rate": "1.0580e-04", + "loss": 0.4902, + "slid_loss": 0.5602, + "step": 4480, + "time": 12.78 + }, + { + "epoch": 4.3, + "learning_rate": "1.0578e-04", + "loss": 0.5863, + "slid_loss": 0.5603, + "step": 4481, + "time": 13.96 + }, + { + "epoch": 4.31, + "learning_rate": "1.0576e-04", + "loss": 0.5728, + "slid_loss": 0.5602, + "step": 4482, + "time": 11.09 + }, + { + "epoch": 4.31, + "learning_rate": "1.0575e-04", + "loss": 0.4727, + "slid_loss": 0.5578, + "step": 4483, + "time": 13.49 + }, + { + "epoch": 4.31, + "learning_rate": "1.0573e-04", + "loss": 0.5703, + "slid_loss": 0.5567, + "step": 4484, + "time": 14.15 + }, + { + "epoch": 4.31, + "learning_rate": "1.0572e-04", + "loss": 0.5005, + "slid_loss": 0.5553, + "step": 4485, + "time": 12.82 + }, + { + "epoch": 4.31, + "learning_rate": "1.0570e-04", + "loss": 0.5055, + "slid_loss": 0.5548, + "step": 4486, + "time": 13.54 + }, + { + "epoch": 4.31, + "learning_rate": "1.0569e-04", + "loss": 0.4682, + "slid_loss": 0.554, + "step": 4487, + "time": 13.31 + }, + { + "epoch": 4.31, + "learning_rate": "1.0567e-04", + "loss": 0.562, + "slid_loss": 0.5537, + "step": 4488, + "time": 13.75 + }, + { + "epoch": 4.31, + "learning_rate": "1.0566e-04", + "loss": 0.5304, + "slid_loss": 0.5534, + "step": 4489, + "time": 13.0 + }, + { + "epoch": 4.31, + "learning_rate": "1.0564e-04", + "loss": 0.5081, + "slid_loss": 0.5532, + "step": 4490, + "time": 13.04 + }, + { + "epoch": 4.31, + "learning_rate": "1.0562e-04", + "loss": 0.5458, + "slid_loss": 0.5539, + "step": 4491, + "time": 13.76 + }, + { + "epoch": 4.32, + "learning_rate": "1.0561e-04", + "loss": 0.5107, + "slid_loss": 0.5537, + "step": 4492, + "time": 13.26 + }, + { + "epoch": 4.32, + "learning_rate": "1.0559e-04", + "loss": 0.5098, + "slid_loss": 0.5537, + "step": 4493, + "time": 13.72 + }, + { + "epoch": 4.32, + "learning_rate": "1.0558e-04", + "loss": 0.6095, + "slid_loss": 0.5544, + "step": 4494, + "time": 12.45 + }, + { + "epoch": 4.32, + "learning_rate": "1.0556e-04", + "loss": 0.5212, + "slid_loss": 0.5541, + "step": 4495, + "time": 11.89 + }, + { + "epoch": 4.32, + "learning_rate": "1.0555e-04", + "loss": 0.5963, + "slid_loss": 0.5537, + "step": 4496, + "time": 13.42 + }, + { + "epoch": 4.32, + "learning_rate": "1.0553e-04", + "loss": 0.592, + "slid_loss": 0.5546, + "step": 4497, + "time": 13.27 + }, + { + "epoch": 4.32, + "learning_rate": "1.0552e-04", + "loss": 0.6565, + "slid_loss": 0.5551, + "step": 4498, + "time": 13.48 + }, + { + "epoch": 4.32, + "learning_rate": "1.0550e-04", + "loss": 0.5105, + "slid_loss": 0.5556, + "step": 4499, + "time": 11.91 + }, + { + "epoch": 4.32, + "learning_rate": "1.0549e-04", + "loss": 0.5513, + "slid_loss": 0.5556, + "step": 4500, + "time": 14.29 + }, + { + "epoch": 4.32, + "learning_rate": "1.0547e-04", + "loss": 0.5206, + "slid_loss": 0.5553, + "step": 4501, + "time": 13.26 + }, + { + "epoch": 4.32, + "learning_rate": "1.0546e-04", + "loss": 0.5941, + "slid_loss": 0.5555, + "step": 4502, + "time": 13.95 + }, + { + "epoch": 4.33, + "learning_rate": "1.0544e-04", + "loss": 0.5431, + "slid_loss": 0.5551, + "step": 4503, + "time": 13.57 + }, + { + "epoch": 4.33, + "learning_rate": "1.0543e-04", + "loss": 0.6153, + "slid_loss": 0.5555, + "step": 4504, + "time": 13.29 + }, + { + "epoch": 4.33, + "learning_rate": "1.0541e-04", + "loss": 0.5241, + "slid_loss": 0.5548, + "step": 4505, + "time": 13.8 + }, + { + "epoch": 4.33, + "learning_rate": "1.0539e-04", + "loss": 0.6102, + "slid_loss": 0.5552, + "step": 4506, + "time": 11.06 + }, + { + "epoch": 4.33, + "learning_rate": "1.0538e-04", + "loss": 0.4648, + "slid_loss": 0.5539, + "step": 4507, + "time": 13.62 + }, + { + "epoch": 4.33, + "learning_rate": "1.0536e-04", + "loss": 0.6178, + "slid_loss": 0.5549, + "step": 4508, + "time": 13.56 + }, + { + "epoch": 4.33, + "learning_rate": "1.0535e-04", + "loss": 0.4409, + "slid_loss": 0.5534, + "step": 4509, + "time": 13.16 + }, + { + "epoch": 4.33, + "learning_rate": "1.0533e-04", + "loss": 0.554, + "slid_loss": 0.5537, + "step": 4510, + "time": 13.76 + }, + { + "epoch": 4.33, + "learning_rate": "1.0532e-04", + "loss": 0.4906, + "slid_loss": 0.5531, + "step": 4511, + "time": 12.69 + }, + { + "epoch": 4.33, + "learning_rate": "1.0530e-04", + "loss": 0.5694, + "slid_loss": 0.5532, + "step": 4512, + "time": 13.43 + }, + { + "epoch": 4.34, + "learning_rate": "1.0529e-04", + "loss": 0.5759, + "slid_loss": 0.553, + "step": 4513, + "time": 13.41 + }, + { + "epoch": 4.34, + "learning_rate": "1.0527e-04", + "loss": 0.5423, + "slid_loss": 0.5529, + "step": 4514, + "time": 13.4 + }, + { + "epoch": 4.34, + "learning_rate": "1.0526e-04", + "loss": 0.6554, + "slid_loss": 0.5537, + "step": 4515, + "time": 13.45 + }, + { + "epoch": 4.34, + "learning_rate": "1.0524e-04", + "loss": 0.561, + "slid_loss": 0.5539, + "step": 4516, + "time": 12.76 + }, + { + "epoch": 4.34, + "learning_rate": "1.0523e-04", + "loss": 0.5369, + "slid_loss": 0.5541, + "step": 4517, + "time": 14.18 + }, + { + "epoch": 4.34, + "learning_rate": "1.0521e-04", + "loss": 0.5693, + "slid_loss": 0.5541, + "step": 4518, + "time": 12.57 + }, + { + "epoch": 4.34, + "learning_rate": "1.0520e-04", + "loss": 0.621, + "slid_loss": 0.5551, + "step": 4519, + "time": 11.52 + }, + { + "epoch": 4.34, + "learning_rate": "1.0518e-04", + "loss": 0.5806, + "slid_loss": 0.5555, + "step": 4520, + "time": 12.83 + }, + { + "epoch": 4.34, + "learning_rate": "1.0517e-04", + "loss": 0.5694, + "slid_loss": 0.5554, + "step": 4521, + "time": 12.95 + }, + { + "epoch": 4.34, + "learning_rate": "1.0516e-04", + "loss": 0.553, + "slid_loss": 0.5559, + "step": 4522, + "time": 13.3 + }, + { + "epoch": 4.34, + "learning_rate": "1.0514e-04", + "loss": 0.4836, + "slid_loss": 0.5554, + "step": 4523, + "time": 14.27 + }, + { + "epoch": 4.35, + "learning_rate": "1.0513e-04", + "loss": 0.649, + "slid_loss": 0.5561, + "step": 4524, + "time": 11.86 + }, + { + "epoch": 4.35, + "learning_rate": "1.0511e-04", + "loss": 0.5419, + "slid_loss": 0.5559, + "step": 4525, + "time": 13.86 + }, + { + "epoch": 4.35, + "learning_rate": "1.0510e-04", + "loss": 0.5648, + "slid_loss": 0.5565, + "step": 4526, + "time": 13.26 + }, + { + "epoch": 4.35, + "learning_rate": "1.0508e-04", + "loss": 0.527, + "slid_loss": 0.5568, + "step": 4527, + "time": 13.78 + }, + { + "epoch": 4.35, + "learning_rate": "1.0507e-04", + "loss": 0.5219, + "slid_loss": 0.5568, + "step": 4528, + "time": 14.12 + }, + { + "epoch": 4.35, + "learning_rate": "1.0505e-04", + "loss": 0.673, + "slid_loss": 0.5583, + "step": 4529, + "time": 13.59 + }, + { + "epoch": 4.35, + "learning_rate": "1.0504e-04", + "loss": 0.5212, + "slid_loss": 0.557, + "step": 4530, + "time": 13.93 + }, + { + "epoch": 4.35, + "learning_rate": "1.0502e-04", + "loss": 0.5785, + "slid_loss": 0.5567, + "step": 4531, + "time": 12.87 + }, + { + "epoch": 4.35, + "learning_rate": "1.0501e-04", + "loss": 0.5516, + "slid_loss": 0.5563, + "step": 4532, + "time": 13.73 + }, + { + "epoch": 4.35, + "learning_rate": "1.0499e-04", + "loss": 0.5774, + "slid_loss": 0.5558, + "step": 4533, + "time": 13.38 + }, + { + "epoch": 4.36, + "learning_rate": "1.0498e-04", + "loss": 0.5418, + "slid_loss": 0.5559, + "step": 4534, + "time": 12.9 + }, + { + "epoch": 4.36, + "learning_rate": "1.0496e-04", + "loss": 0.5454, + "slid_loss": 0.5555, + "step": 4535, + "time": 13.45 + }, + { + "epoch": 4.36, + "learning_rate": "1.0495e-04", + "loss": 0.6359, + "slid_loss": 0.5563, + "step": 4536, + "time": 13.23 + }, + { + "epoch": 4.36, + "learning_rate": "1.0493e-04", + "loss": 0.5451, + "slid_loss": 0.556, + "step": 4537, + "time": 11.88 + }, + { + "epoch": 4.36, + "learning_rate": "1.0492e-04", + "loss": 0.5623, + "slid_loss": 0.5556, + "step": 4538, + "time": 13.51 + }, + { + "epoch": 4.36, + "learning_rate": "1.0491e-04", + "loss": 0.4879, + "slid_loss": 0.5542, + "step": 4539, + "time": 14.21 + }, + { + "epoch": 4.36, + "learning_rate": "1.0489e-04", + "loss": 0.4647, + "slid_loss": 0.5535, + "step": 4540, + "time": 10.9 + }, + { + "epoch": 4.36, + "learning_rate": "1.0488e-04", + "loss": 0.5222, + "slid_loss": 0.5533, + "step": 4541, + "time": 13.03 + }, + { + "epoch": 4.36, + "learning_rate": "1.0486e-04", + "loss": 0.6058, + "slid_loss": 0.5547, + "step": 4542, + "time": 11.9 + }, + { + "epoch": 4.36, + "learning_rate": "1.0485e-04", + "loss": 0.6018, + "slid_loss": 0.5551, + "step": 4543, + "time": 13.03 + }, + { + "epoch": 4.37, + "learning_rate": "1.0483e-04", + "loss": 0.5497, + "slid_loss": 0.5555, + "step": 4544, + "time": 11.65 + }, + { + "epoch": 4.37, + "learning_rate": "1.0482e-04", + "loss": 0.5423, + "slid_loss": 0.5555, + "step": 4545, + "time": 13.22 + }, + { + "epoch": 4.37, + "learning_rate": "1.0481e-04", + "loss": 0.5609, + "slid_loss": 0.5549, + "step": 4546, + "time": 12.83 + }, + { + "epoch": 4.37, + "learning_rate": "1.0479e-04", + "loss": 0.4974, + "slid_loss": 0.5545, + "step": 4547, + "time": 12.37 + }, + { + "epoch": 4.37, + "learning_rate": "1.0478e-04", + "loss": 0.4871, + "slid_loss": 0.5537, + "step": 4548, + "time": 13.54 + }, + { + "epoch": 4.37, + "learning_rate": "1.0476e-04", + "loss": 0.5857, + "slid_loss": 0.5531, + "step": 4549, + "time": 12.77 + }, + { + "epoch": 4.37, + "learning_rate": "1.0475e-04", + "loss": 0.5199, + "slid_loss": 0.5531, + "step": 4550, + "time": 12.12 + }, + { + "epoch": 4.37, + "learning_rate": "1.0473e-04", + "loss": 0.5687, + "slid_loss": 0.5532, + "step": 4551, + "time": 13.45 + }, + { + "epoch": 4.37, + "learning_rate": "1.0472e-04", + "loss": 0.5518, + "slid_loss": 0.553, + "step": 4552, + "time": 11.91 + }, + { + "epoch": 4.37, + "learning_rate": "1.0471e-04", + "loss": 0.5513, + "slid_loss": 0.5529, + "step": 4553, + "time": 12.69 + }, + { + "epoch": 4.37, + "learning_rate": "1.0469e-04", + "loss": 0.6022, + "slid_loss": 0.5536, + "step": 4554, + "time": 13.21 + }, + { + "epoch": 4.38, + "learning_rate": "1.0468e-04", + "loss": 0.5103, + "slid_loss": 0.5534, + "step": 4555, + "time": 13.64 + }, + { + "epoch": 4.38, + "learning_rate": "1.0466e-04", + "loss": 0.5465, + "slid_loss": 0.5527, + "step": 4556, + "time": 13.19 + }, + { + "epoch": 4.38, + "learning_rate": "1.0465e-04", + "loss": 0.5244, + "slid_loss": 0.5514, + "step": 4557, + "time": 13.64 + }, + { + "epoch": 4.38, + "learning_rate": "1.0463e-04", + "loss": 0.5232, + "slid_loss": 0.5512, + "step": 4558, + "time": 12.91 + }, + { + "epoch": 4.38, + "learning_rate": "1.0462e-04", + "loss": 0.4879, + "slid_loss": 0.5514, + "step": 4559, + "time": 13.85 + }, + { + "epoch": 4.38, + "learning_rate": "1.0461e-04", + "loss": 0.5309, + "slid_loss": 0.5511, + "step": 4560, + "time": 12.25 + }, + { + "epoch": 4.38, + "learning_rate": "1.0459e-04", + "loss": 0.5395, + "slid_loss": 0.5519, + "step": 4561, + "time": 12.24 + }, + { + "epoch": 4.38, + "learning_rate": "1.0458e-04", + "loss": 0.561, + "slid_loss": 0.5513, + "step": 4562, + "time": 12.85 + }, + { + "epoch": 4.38, + "learning_rate": "1.0456e-04", + "loss": 0.5507, + "slid_loss": 0.5514, + "step": 4563, + "time": 12.94 + }, + { + "epoch": 4.38, + "learning_rate": "1.0455e-04", + "loss": 0.5411, + "slid_loss": 0.5528, + "step": 4564, + "time": 13.06 + }, + { + "epoch": 4.39, + "learning_rate": "1.0454e-04", + "loss": 0.5512, + "slid_loss": 0.5536, + "step": 4565, + "time": 12.25 + }, + { + "epoch": 4.39, + "learning_rate": "1.0452e-04", + "loss": 0.5925, + "slid_loss": 0.5541, + "step": 4566, + "time": 13.25 + }, + { + "epoch": 4.39, + "learning_rate": "1.0451e-04", + "loss": 0.627, + "slid_loss": 0.5548, + "step": 4567, + "time": 13.62 + }, + { + "epoch": 4.39, + "learning_rate": "1.0449e-04", + "loss": 0.5893, + "slid_loss": 0.5555, + "step": 4568, + "time": 13.74 + }, + { + "epoch": 4.39, + "learning_rate": "1.0448e-04", + "loss": 0.5069, + "slid_loss": 0.5545, + "step": 4569, + "time": 11.82 + }, + { + "epoch": 4.39, + "learning_rate": "1.0447e-04", + "loss": 0.4982, + "slid_loss": 0.553, + "step": 4570, + "time": 13.31 + }, + { + "epoch": 4.39, + "learning_rate": "1.0445e-04", + "loss": 0.5289, + "slid_loss": 0.5531, + "step": 4571, + "time": 13.97 + }, + { + "epoch": 4.39, + "learning_rate": "1.0444e-04", + "loss": 0.527, + "slid_loss": 0.5533, + "step": 4572, + "time": 13.19 + }, + { + "epoch": 4.39, + "learning_rate": "1.0443e-04", + "loss": 0.5932, + "slid_loss": 0.5533, + "step": 4573, + "time": 11.97 + }, + { + "epoch": 4.39, + "learning_rate": "1.0441e-04", + "loss": 0.6591, + "slid_loss": 0.554, + "step": 4574, + "time": 12.57 + }, + { + "epoch": 4.39, + "learning_rate": "1.0440e-04", + "loss": 0.5524, + "slid_loss": 0.5535, + "step": 4575, + "time": 13.04 + }, + { + "epoch": 4.4, + "learning_rate": "1.0438e-04", + "loss": 0.5381, + "slid_loss": 0.5529, + "step": 4576, + "time": 13.71 + }, + { + "epoch": 4.4, + "learning_rate": "1.0437e-04", + "loss": 0.5816, + "slid_loss": 0.5532, + "step": 4577, + "time": 11.91 + }, + { + "epoch": 4.4, + "learning_rate": "1.0436e-04", + "loss": 0.5995, + "slid_loss": 0.5526, + "step": 4578, + "time": 12.92 + }, + { + "epoch": 4.4, + "learning_rate": "1.0434e-04", + "loss": 0.5709, + "slid_loss": 0.5523, + "step": 4579, + "time": 11.89 + }, + { + "epoch": 4.4, + "learning_rate": "1.0433e-04", + "loss": 0.4961, + "slid_loss": 0.5524, + "step": 4580, + "time": 13.33 + }, + { + "epoch": 4.4, + "learning_rate": "1.0432e-04", + "loss": 0.6488, + "slid_loss": 0.553, + "step": 4581, + "time": 14.17 + }, + { + "epoch": 4.4, + "learning_rate": "1.0430e-04", + "loss": 0.5538, + "slid_loss": 0.5528, + "step": 4582, + "time": 13.34 + }, + { + "epoch": 4.4, + "learning_rate": "1.0429e-04", + "loss": 0.4702, + "slid_loss": 0.5528, + "step": 4583, + "time": 12.6 + }, + { + "epoch": 4.4, + "learning_rate": "1.0427e-04", + "loss": 0.5966, + "slid_loss": 0.553, + "step": 4584, + "time": 13.4 + }, + { + "epoch": 4.4, + "learning_rate": "1.0426e-04", + "loss": 0.4817, + "slid_loss": 0.5529, + "step": 4585, + "time": 11.86 + }, + { + "epoch": 4.41, + "learning_rate": "1.0425e-04", + "loss": 0.6157, + "slid_loss": 0.554, + "step": 4586, + "time": 12.25 + }, + { + "epoch": 4.41, + "learning_rate": "1.0423e-04", + "loss": 0.4919, + "slid_loss": 0.5542, + "step": 4587, + "time": 13.8 + }, + { + "epoch": 4.41, + "learning_rate": "1.0422e-04", + "loss": 0.5944, + "slid_loss": 0.5545, + "step": 4588, + "time": 14.07 + }, + { + "epoch": 4.41, + "learning_rate": "1.0421e-04", + "loss": 0.5339, + "slid_loss": 0.5546, + "step": 4589, + "time": 13.56 + }, + { + "epoch": 4.41, + "learning_rate": "1.0419e-04", + "loss": 0.5287, + "slid_loss": 0.5548, + "step": 4590, + "time": 13.37 + }, + { + "epoch": 4.41, + "learning_rate": "1.0418e-04", + "loss": 0.4689, + "slid_loss": 0.554, + "step": 4591, + "time": 11.5 + }, + { + "epoch": 4.41, + "learning_rate": "1.0417e-04", + "loss": 0.5358, + "slid_loss": 0.5542, + "step": 4592, + "time": 12.17 + }, + { + "epoch": 4.41, + "learning_rate": "1.0415e-04", + "loss": 0.5776, + "slid_loss": 0.5549, + "step": 4593, + "time": 11.31 + }, + { + "epoch": 4.41, + "learning_rate": "1.0414e-04", + "loss": 0.5559, + "slid_loss": 0.5544, + "step": 4594, + "time": 13.2 + }, + { + "epoch": 4.41, + "learning_rate": "1.0413e-04", + "loss": 0.4763, + "slid_loss": 0.5539, + "step": 4595, + "time": 12.98 + }, + { + "epoch": 4.41, + "learning_rate": "1.0411e-04", + "loss": 0.5465, + "slid_loss": 0.5534, + "step": 4596, + "time": 12.86 + }, + { + "epoch": 4.42, + "learning_rate": "1.0410e-04", + "loss": 0.5669, + "slid_loss": 0.5532, + "step": 4597, + "time": 13.77 + }, + { + "epoch": 4.42, + "learning_rate": "1.0409e-04", + "loss": 0.5102, + "slid_loss": 0.5517, + "step": 4598, + "time": 13.76 + }, + { + "epoch": 4.42, + "learning_rate": "1.0407e-04", + "loss": 0.5412, + "slid_loss": 0.552, + "step": 4599, + "time": 12.95 + }, + { + "epoch": 4.42, + "learning_rate": "1.0406e-04", + "loss": 0.4852, + "slid_loss": 0.5514, + "step": 4600, + "time": 11.28 + }, + { + "epoch": 4.42, + "learning_rate": "1.0405e-04", + "loss": 0.5492, + "slid_loss": 0.5517, + "step": 4601, + "time": 13.69 + }, + { + "epoch": 4.42, + "learning_rate": "1.0403e-04", + "loss": 0.4833, + "slid_loss": 0.5506, + "step": 4602, + "time": 12.83 + }, + { + "epoch": 4.42, + "learning_rate": "1.0402e-04", + "loss": 0.6499, + "slid_loss": 0.5516, + "step": 4603, + "time": 13.69 + }, + { + "epoch": 4.42, + "learning_rate": "1.0401e-04", + "loss": 0.4999, + "slid_loss": 0.5505, + "step": 4604, + "time": 13.63 + }, + { + "epoch": 4.42, + "learning_rate": "1.0399e-04", + "loss": 0.5291, + "slid_loss": 0.5505, + "step": 4605, + "time": 12.35 + }, + { + "epoch": 4.42, + "learning_rate": "1.0398e-04", + "loss": 0.5613, + "slid_loss": 0.55, + "step": 4606, + "time": 12.02 + }, + { + "epoch": 4.43, + "learning_rate": "1.0397e-04", + "loss": 0.6657, + "slid_loss": 0.552, + "step": 4607, + "time": 14.47 + }, + { + "epoch": 4.43, + "learning_rate": "1.0395e-04", + "loss": 0.6183, + "slid_loss": 0.552, + "step": 4608, + "time": 12.57 + }, + { + "epoch": 4.43, + "learning_rate": "1.0394e-04", + "loss": 0.5911, + "slid_loss": 0.5535, + "step": 4609, + "time": 13.9 + }, + { + "epoch": 4.43, + "learning_rate": "1.0393e-04", + "loss": 0.589, + "slid_loss": 0.5539, + "step": 4610, + "time": 13.02 + }, + { + "epoch": 4.43, + "learning_rate": "1.0392e-04", + "loss": 0.5499, + "slid_loss": 0.5545, + "step": 4611, + "time": 12.81 + }, + { + "epoch": 4.43, + "learning_rate": "1.0390e-04", + "loss": 0.5483, + "slid_loss": 0.5543, + "step": 4612, + "time": 12.98 + }, + { + "epoch": 4.43, + "learning_rate": "1.0389e-04", + "loss": 0.5976, + "slid_loss": 0.5545, + "step": 4613, + "time": 13.44 + }, + { + "epoch": 4.43, + "learning_rate": "1.0388e-04", + "loss": 0.6254, + "slid_loss": 0.5553, + "step": 4614, + "time": 13.57 + }, + { + "epoch": 4.43, + "learning_rate": "1.0386e-04", + "loss": 0.6337, + "slid_loss": 0.5551, + "step": 4615, + "time": 12.73 + }, + { + "epoch": 4.43, + "learning_rate": "1.0385e-04", + "loss": 0.5324, + "slid_loss": 0.5548, + "step": 4616, + "time": 13.94 + }, + { + "epoch": 4.44, + "learning_rate": "1.0384e-04", + "loss": 0.5672, + "slid_loss": 0.5551, + "step": 4617, + "time": 14.06 + }, + { + "epoch": 4.44, + "learning_rate": "1.0383e-04", + "loss": 0.5773, + "slid_loss": 0.5552, + "step": 4618, + "time": 12.81 + }, + { + "epoch": 4.44, + "learning_rate": "1.0381e-04", + "loss": 0.5793, + "slid_loss": 0.5548, + "step": 4619, + "time": 11.82 + }, + { + "epoch": 4.44, + "learning_rate": "1.0380e-04", + "loss": 0.5341, + "slid_loss": 0.5543, + "step": 4620, + "time": 13.12 + }, + { + "epoch": 4.44, + "learning_rate": "1.0379e-04", + "loss": 0.5612, + "slid_loss": 0.5542, + "step": 4621, + "time": 13.84 + }, + { + "epoch": 4.44, + "learning_rate": "1.0377e-04", + "loss": 0.4973, + "slid_loss": 0.5537, + "step": 4622, + "time": 14.14 + }, + { + "epoch": 4.44, + "learning_rate": "1.0376e-04", + "loss": 0.5334, + "slid_loss": 0.5542, + "step": 4623, + "time": 10.62 + }, + { + "epoch": 4.44, + "learning_rate": "1.0375e-04", + "loss": 0.5235, + "slid_loss": 0.5529, + "step": 4624, + "time": 13.42 + }, + { + "epoch": 4.44, + "learning_rate": "1.0374e-04", + "loss": 0.6071, + "slid_loss": 0.5536, + "step": 4625, + "time": 13.35 + }, + { + "epoch": 4.44, + "learning_rate": "1.0372e-04", + "loss": 0.4875, + "slid_loss": 0.5528, + "step": 4626, + "time": 13.48 + }, + { + "epoch": 4.44, + "learning_rate": "1.0371e-04", + "loss": 0.5622, + "slid_loss": 0.5532, + "step": 4627, + "time": 11.87 + }, + { + "epoch": 4.45, + "learning_rate": "1.0370e-04", + "loss": 0.5594, + "slid_loss": 0.5535, + "step": 4628, + "time": 13.68 + }, + { + "epoch": 4.45, + "learning_rate": "1.0369e-04", + "loss": 0.5694, + "slid_loss": 0.5525, + "step": 4629, + "time": 13.04 + }, + { + "epoch": 4.45, + "learning_rate": "1.0367e-04", + "loss": 0.5362, + "slid_loss": 0.5526, + "step": 4630, + "time": 14.1 + }, + { + "epoch": 4.45, + "learning_rate": "1.0366e-04", + "loss": 0.5336, + "slid_loss": 0.5522, + "step": 4631, + "time": 13.38 + }, + { + "epoch": 4.45, + "learning_rate": "1.0365e-04", + "loss": 0.5767, + "slid_loss": 0.5524, + "step": 4632, + "time": 13.89 + }, + { + "epoch": 4.45, + "learning_rate": "1.0363e-04", + "loss": 0.6667, + "slid_loss": 0.5533, + "step": 4633, + "time": 12.51 + }, + { + "epoch": 4.45, + "learning_rate": "1.0362e-04", + "loss": 0.4877, + "slid_loss": 0.5528, + "step": 4634, + "time": 10.89 + }, + { + "epoch": 4.45, + "learning_rate": "1.0361e-04", + "loss": 0.6421, + "slid_loss": 0.5538, + "step": 4635, + "time": 14.21 + }, + { + "epoch": 4.45, + "learning_rate": "1.0360e-04", + "loss": 0.4801, + "slid_loss": 0.5522, + "step": 4636, + "time": 14.13 + }, + { + "epoch": 4.45, + "learning_rate": "1.0358e-04", + "loss": 0.5815, + "slid_loss": 0.5526, + "step": 4637, + "time": 13.63 + }, + { + "epoch": 4.46, + "learning_rate": "1.0357e-04", + "loss": 0.6085, + "slid_loss": 0.553, + "step": 4638, + "time": 13.43 + }, + { + "epoch": 4.46, + "learning_rate": "1.0356e-04", + "loss": 0.5816, + "slid_loss": 0.554, + "step": 4639, + "time": 12.96 + }, + { + "epoch": 4.46, + "learning_rate": "1.0355e-04", + "loss": 0.6063, + "slid_loss": 0.5554, + "step": 4640, + "time": 13.35 + }, + { + "epoch": 4.46, + "learning_rate": "1.0353e-04", + "loss": 0.5629, + "slid_loss": 0.5558, + "step": 4641, + "time": 13.23 + }, + { + "epoch": 4.46, + "learning_rate": "1.0352e-04", + "loss": 0.5163, + "slid_loss": 0.5549, + "step": 4642, + "time": 12.78 + }, + { + "epoch": 4.46, + "learning_rate": "1.0351e-04", + "loss": 0.5287, + "slid_loss": 0.5542, + "step": 4643, + "time": 13.29 + }, + { + "epoch": 4.46, + "learning_rate": "1.0350e-04", + "loss": 0.4713, + "slid_loss": 0.5534, + "step": 4644, + "time": 14.05 + }, + { + "epoch": 4.46, + "learning_rate": "1.0349e-04", + "loss": 0.5099, + "slid_loss": 0.5531, + "step": 4645, + "time": 13.78 + }, + { + "epoch": 4.46, + "learning_rate": "1.0347e-04", + "loss": 0.5659, + "slid_loss": 0.5531, + "step": 4646, + "time": 11.35 + }, + { + "epoch": 4.46, + "learning_rate": "1.0346e-04", + "loss": 0.5787, + "slid_loss": 0.5539, + "step": 4647, + "time": 12.64 + }, + { + "epoch": 4.46, + "learning_rate": "1.0345e-04", + "loss": 0.418, + "slid_loss": 0.5532, + "step": 4648, + "time": 12.93 + }, + { + "epoch": 4.47, + "learning_rate": "1.0344e-04", + "loss": 0.4902, + "slid_loss": 0.5523, + "step": 4649, + "time": 13.84 + }, + { + "epoch": 4.47, + "learning_rate": "1.0342e-04", + "loss": 0.5935, + "slid_loss": 0.553, + "step": 4650, + "time": 11.42 + }, + { + "epoch": 4.47, + "learning_rate": "1.0341e-04", + "loss": 0.6261, + "slid_loss": 0.5536, + "step": 4651, + "time": 14.12 + }, + { + "epoch": 4.47, + "learning_rate": "1.0340e-04", + "loss": 0.5392, + "slid_loss": 0.5535, + "step": 4652, + "time": 13.34 + }, + { + "epoch": 4.47, + "learning_rate": "1.0339e-04", + "loss": 0.5094, + "slid_loss": 0.553, + "step": 4653, + "time": 13.3 + }, + { + "epoch": 4.47, + "learning_rate": "1.0338e-04", + "loss": 0.5454, + "slid_loss": 0.5525, + "step": 4654, + "time": 13.03 + }, + { + "epoch": 4.47, + "learning_rate": "1.0336e-04", + "loss": 0.5727, + "slid_loss": 0.5531, + "step": 4655, + "time": 11.03 + }, + { + "epoch": 4.47, + "learning_rate": "1.0335e-04", + "loss": 0.5821, + "slid_loss": 0.5534, + "step": 4656, + "time": 13.36 + }, + { + "epoch": 4.47, + "learning_rate": "1.0334e-04", + "loss": 0.5104, + "slid_loss": 0.5533, + "step": 4657, + "time": 13.58 + }, + { + "epoch": 4.47, + "learning_rate": "1.0333e-04", + "loss": 0.6151, + "slid_loss": 0.5542, + "step": 4658, + "time": 13.33 + }, + { + "epoch": 4.48, + "learning_rate": "1.0332e-04", + "loss": 0.4998, + "slid_loss": 0.5543, + "step": 4659, + "time": 12.87 + }, + { + "epoch": 4.48, + "learning_rate": "1.0330e-04", + "loss": 0.5408, + "slid_loss": 0.5544, + "step": 4660, + "time": 13.35 + }, + { + "epoch": 4.48, + "learning_rate": "1.0329e-04", + "loss": 0.523, + "slid_loss": 0.5543, + "step": 4661, + "time": 14.2 + }, + { + "epoch": 4.48, + "learning_rate": "1.0328e-04", + "loss": 0.5254, + "slid_loss": 0.5539, + "step": 4662, + "time": 12.9 + }, + { + "epoch": 4.48, + "learning_rate": "1.0327e-04", + "loss": 0.4647, + "slid_loss": 0.5531, + "step": 4663, + "time": 13.79 + }, + { + "epoch": 4.48, + "learning_rate": "1.0326e-04", + "loss": 0.5454, + "slid_loss": 0.5531, + "step": 4664, + "time": 13.39 + }, + { + "epoch": 4.48, + "learning_rate": "1.0324e-04", + "loss": 0.4749, + "slid_loss": 0.5523, + "step": 4665, + "time": 13.58 + }, + { + "epoch": 4.48, + "learning_rate": "1.0323e-04", + "loss": 0.5637, + "slid_loss": 0.5521, + "step": 4666, + "time": 13.44 + }, + { + "epoch": 4.48, + "learning_rate": "1.0322e-04", + "loss": 0.5824, + "slid_loss": 0.5516, + "step": 4667, + "time": 12.88 + }, + { + "epoch": 4.48, + "learning_rate": "1.0321e-04", + "loss": 0.5336, + "slid_loss": 0.5511, + "step": 4668, + "time": 13.73 + }, + { + "epoch": 4.49, + "learning_rate": "1.0320e-04", + "loss": 0.5507, + "slid_loss": 0.5515, + "step": 4669, + "time": 12.87 + }, + { + "epoch": 4.49, + "learning_rate": "1.0318e-04", + "loss": 0.5363, + "slid_loss": 0.5519, + "step": 4670, + "time": 13.18 + }, + { + "epoch": 4.49, + "learning_rate": "1.0317e-04", + "loss": 0.5987, + "slid_loss": 0.5526, + "step": 4671, + "time": 13.45 + }, + { + "epoch": 4.49, + "learning_rate": "1.0316e-04", + "loss": 0.4834, + "slid_loss": 0.5521, + "step": 4672, + "time": 13.96 + }, + { + "epoch": 4.49, + "learning_rate": "1.0315e-04", + "loss": 0.4996, + "slid_loss": 0.5512, + "step": 4673, + "time": 12.85 + }, + { + "epoch": 4.49, + "learning_rate": "1.0314e-04", + "loss": 0.5675, + "slid_loss": 0.5503, + "step": 4674, + "time": 12.98 + }, + { + "epoch": 4.49, + "learning_rate": "1.0313e-04", + "loss": 0.5412, + "slid_loss": 0.5502, + "step": 4675, + "time": 13.3 + }, + { + "epoch": 4.49, + "learning_rate": "1.0311e-04", + "loss": 0.4883, + "slid_loss": 0.5497, + "step": 4676, + "time": 12.04 + }, + { + "epoch": 4.49, + "learning_rate": "1.0310e-04", + "loss": 0.5627, + "slid_loss": 0.5495, + "step": 4677, + "time": 11.45 + }, + { + "epoch": 4.49, + "learning_rate": "1.0309e-04", + "loss": 0.5914, + "slid_loss": 0.5494, + "step": 4678, + "time": 11.44 + }, + { + "epoch": 4.49, + "learning_rate": "1.0308e-04", + "loss": 0.5366, + "slid_loss": 0.5491, + "step": 4679, + "time": 12.81 + }, + { + "epoch": 4.5, + "learning_rate": "1.0307e-04", + "loss": 0.5133, + "slid_loss": 0.5492, + "step": 4680, + "time": 13.0 + }, + { + "epoch": 4.5, + "learning_rate": "1.0306e-04", + "loss": 0.5782, + "slid_loss": 0.5485, + "step": 4681, + "time": 12.89 + }, + { + "epoch": 4.5, + "learning_rate": "1.0304e-04", + "loss": 0.4773, + "slid_loss": 0.5478, + "step": 4682, + "time": 13.38 + }, + { + "epoch": 4.5, + "learning_rate": "1.0303e-04", + "loss": 0.4921, + "slid_loss": 0.548, + "step": 4683, + "time": 11.96 + }, + { + "epoch": 4.5, + "learning_rate": "1.0302e-04", + "loss": 0.5554, + "slid_loss": 0.5476, + "step": 4684, + "time": 12.97 + }, + { + "epoch": 4.5, + "learning_rate": "1.0301e-04", + "loss": 0.6223, + "slid_loss": 0.549, + "step": 4685, + "time": 13.52 + }, + { + "epoch": 4.5, + "learning_rate": "1.0300e-04", + "loss": 0.5961, + "slid_loss": 0.5488, + "step": 4686, + "time": 13.85 + }, + { + "epoch": 4.5, + "learning_rate": "1.0299e-04", + "loss": 0.5749, + "slid_loss": 0.5496, + "step": 4687, + "time": 13.62 + }, + { + "epoch": 4.5, + "learning_rate": "1.0298e-04", + "loss": 0.517, + "slid_loss": 0.5488, + "step": 4688, + "time": 12.07 + }, + { + "epoch": 4.5, + "learning_rate": "1.0296e-04", + "loss": 0.5318, + "slid_loss": 0.5488, + "step": 4689, + "time": 12.23 + }, + { + "epoch": 4.51, + "learning_rate": "1.0295e-04", + "loss": 0.576, + "slid_loss": 0.5493, + "step": 4690, + "time": 13.16 + }, + { + "epoch": 4.51, + "learning_rate": "1.0294e-04", + "loss": 0.5547, + "slid_loss": 0.5501, + "step": 4691, + "time": 12.62 + }, + { + "epoch": 4.51, + "learning_rate": "1.0293e-04", + "loss": 0.5048, + "slid_loss": 0.5498, + "step": 4692, + "time": 13.95 + }, + { + "epoch": 4.51, + "learning_rate": "1.0292e-04", + "loss": 0.5649, + "slid_loss": 0.5497, + "step": 4693, + "time": 13.2 + }, + { + "epoch": 4.51, + "learning_rate": "1.0291e-04", + "loss": 0.5191, + "slid_loss": 0.5493, + "step": 4694, + "time": 12.93 + }, + { + "epoch": 4.51, + "learning_rate": "1.0290e-04", + "loss": 0.5293, + "slid_loss": 0.5499, + "step": 4695, + "time": 12.88 + }, + { + "epoch": 4.51, + "learning_rate": "1.0289e-04", + "loss": 0.5833, + "slid_loss": 0.5502, + "step": 4696, + "time": 14.23 + }, + { + "epoch": 4.51, + "learning_rate": "1.0287e-04", + "loss": 0.5538, + "slid_loss": 0.5501, + "step": 4697, + "time": 13.23 + }, + { + "epoch": 4.51, + "learning_rate": "1.0286e-04", + "loss": 0.5292, + "slid_loss": 0.5503, + "step": 4698, + "time": 13.27 + }, + { + "epoch": 4.51, + "learning_rate": "1.0285e-04", + "loss": 0.5432, + "slid_loss": 0.5503, + "step": 4699, + "time": 13.34 + }, + { + "epoch": 4.51, + "learning_rate": "1.0284e-04", + "loss": 0.5262, + "slid_loss": 0.5507, + "step": 4700, + "time": 12.86 + }, + { + "epoch": 4.52, + "learning_rate": "1.0283e-04", + "loss": 0.522, + "slid_loss": 0.5505, + "step": 4701, + "time": 14.95 + }, + { + "epoch": 4.52, + "learning_rate": "1.0282e-04", + "loss": 0.5277, + "slid_loss": 0.5509, + "step": 4702, + "time": 10.61 + }, + { + "epoch": 4.52, + "learning_rate": "1.0281e-04", + "loss": 0.4832, + "slid_loss": 0.5492, + "step": 4703, + "time": 13.81 + }, + { + "epoch": 4.52, + "learning_rate": "1.0280e-04", + "loss": 0.569, + "slid_loss": 0.5499, + "step": 4704, + "time": 13.12 + }, + { + "epoch": 4.52, + "learning_rate": "1.0279e-04", + "loss": 0.5246, + "slid_loss": 0.5499, + "step": 4705, + "time": 13.4 + }, + { + "epoch": 4.52, + "learning_rate": "1.0277e-04", + "loss": 0.5187, + "slid_loss": 0.5494, + "step": 4706, + "time": 13.64 + }, + { + "epoch": 4.52, + "learning_rate": "1.0276e-04", + "loss": 0.483, + "slid_loss": 0.5476, + "step": 4707, + "time": 12.29 + }, + { + "epoch": 4.52, + "learning_rate": "1.0275e-04", + "loss": 0.5577, + "slid_loss": 0.547, + "step": 4708, + "time": 13.52 + }, + { + "epoch": 4.52, + "learning_rate": "1.0274e-04", + "loss": 0.5263, + "slid_loss": 0.5464, + "step": 4709, + "time": 12.32 + }, + { + "epoch": 4.52, + "learning_rate": "1.0273e-04", + "loss": 0.55, + "slid_loss": 0.546, + "step": 4710, + "time": 13.81 + }, + { + "epoch": 4.53, + "learning_rate": "1.0272e-04", + "loss": 0.571, + "slid_loss": 0.5462, + "step": 4711, + "time": 11.53 + }, + { + "epoch": 4.53, + "learning_rate": "1.0271e-04", + "loss": 0.5602, + "slid_loss": 0.5463, + "step": 4712, + "time": 12.68 + }, + { + "epoch": 4.53, + "learning_rate": "1.0270e-04", + "loss": 0.5305, + "slid_loss": 0.5456, + "step": 4713, + "time": 13.27 + }, + { + "epoch": 4.53, + "learning_rate": "1.0269e-04", + "loss": 0.4363, + "slid_loss": 0.5437, + "step": 4714, + "time": 14.28 + }, + { + "epoch": 4.53, + "learning_rate": "1.0268e-04", + "loss": 0.5301, + "slid_loss": 0.5427, + "step": 4715, + "time": 13.79 + }, + { + "epoch": 4.53, + "learning_rate": "1.0267e-04", + "loss": 0.6129, + "slid_loss": 0.5435, + "step": 4716, + "time": 13.39 + }, + { + "epoch": 4.53, + "learning_rate": "1.0265e-04", + "loss": 0.53, + "slid_loss": 0.5431, + "step": 4717, + "time": 12.34 + }, + { + "epoch": 4.53, + "learning_rate": "1.0264e-04", + "loss": 0.5885, + "slid_loss": 0.5433, + "step": 4718, + "time": 12.38 + }, + { + "epoch": 4.53, + "learning_rate": "1.0263e-04", + "loss": 0.5413, + "slid_loss": 0.5429, + "step": 4719, + "time": 12.09 + }, + { + "epoch": 4.53, + "learning_rate": "1.0262e-04", + "loss": 0.596, + "slid_loss": 0.5435, + "step": 4720, + "time": 13.6 + }, + { + "epoch": 4.54, + "learning_rate": "1.0261e-04", + "loss": 0.513, + "slid_loss": 0.543, + "step": 4721, + "time": 12.93 + }, + { + "epoch": 4.54, + "learning_rate": "1.0260e-04", + "loss": 0.5262, + "slid_loss": 0.5433, + "step": 4722, + "time": 13.89 + }, + { + "epoch": 4.54, + "learning_rate": "1.0259e-04", + "loss": 0.5726, + "slid_loss": 0.5437, + "step": 4723, + "time": 13.66 + }, + { + "epoch": 4.54, + "learning_rate": "1.0258e-04", + "loss": 0.5686, + "slid_loss": 0.5441, + "step": 4724, + "time": 12.17 + }, + { + "epoch": 4.54, + "learning_rate": "1.0257e-04", + "loss": 0.5895, + "slid_loss": 0.544, + "step": 4725, + "time": 13.04 + }, + { + "epoch": 4.54, + "learning_rate": "1.0256e-04", + "loss": 0.5341, + "slid_loss": 0.5444, + "step": 4726, + "time": 14.04 + }, + { + "epoch": 4.54, + "learning_rate": "1.0255e-04", + "loss": 0.5201, + "slid_loss": 0.544, + "step": 4727, + "time": 13.33 + }, + { + "epoch": 4.54, + "learning_rate": "1.0254e-04", + "loss": 0.562, + "slid_loss": 0.544, + "step": 4728, + "time": 12.71 + }, + { + "epoch": 4.54, + "learning_rate": "1.0253e-04", + "loss": 0.573, + "slid_loss": 0.5441, + "step": 4729, + "time": 13.28 + }, + { + "epoch": 4.54, + "learning_rate": "1.0252e-04", + "loss": 0.4773, + "slid_loss": 0.5435, + "step": 4730, + "time": 11.68 + }, + { + "epoch": 4.54, + "learning_rate": "1.0251e-04", + "loss": 0.5289, + "slid_loss": 0.5434, + "step": 4731, + "time": 11.54 + }, + { + "epoch": 4.55, + "learning_rate": "1.0250e-04", + "loss": 0.545, + "slid_loss": 0.5431, + "step": 4732, + "time": 12.9 + }, + { + "epoch": 4.55, + "learning_rate": "1.0248e-04", + "loss": 0.5899, + "slid_loss": 0.5424, + "step": 4733, + "time": 13.25 + }, + { + "epoch": 4.55, + "learning_rate": "1.0247e-04", + "loss": 0.5469, + "slid_loss": 0.5429, + "step": 4734, + "time": 12.79 + }, + { + "epoch": 4.55, + "learning_rate": "1.0246e-04", + "loss": 0.5054, + "slid_loss": 0.5416, + "step": 4735, + "time": 13.75 + }, + { + "epoch": 4.55, + "learning_rate": "1.0245e-04", + "loss": 0.5483, + "slid_loss": 0.5423, + "step": 4736, + "time": 12.68 + }, + { + "epoch": 4.55, + "learning_rate": "1.0244e-04", + "loss": 0.5817, + "slid_loss": 0.5423, + "step": 4737, + "time": 14.05 + }, + { + "epoch": 4.55, + "learning_rate": "1.0243e-04", + "loss": 0.5196, + "slid_loss": 0.5414, + "step": 4738, + "time": 13.56 + }, + { + "epoch": 4.55, + "learning_rate": "1.0242e-04", + "loss": 0.5095, + "slid_loss": 0.5407, + "step": 4739, + "time": 12.04 + }, + { + "epoch": 4.55, + "learning_rate": "1.0241e-04", + "loss": 0.5494, + "slid_loss": 0.5401, + "step": 4740, + "time": 13.33 + }, + { + "epoch": 4.55, + "learning_rate": "1.0240e-04", + "loss": 0.5926, + "slid_loss": 0.5404, + "step": 4741, + "time": 12.97 + }, + { + "epoch": 4.56, + "learning_rate": "1.0239e-04", + "loss": 0.5782, + "slid_loss": 0.541, + "step": 4742, + "time": 13.0 + }, + { + "epoch": 4.56, + "learning_rate": "1.0238e-04", + "loss": 0.5186, + "slid_loss": 0.5409, + "step": 4743, + "time": 12.91 + }, + { + "epoch": 4.56, + "learning_rate": "1.0237e-04", + "loss": 0.5384, + "slid_loss": 0.5416, + "step": 4744, + "time": 11.89 + }, + { + "epoch": 4.56, + "learning_rate": "1.0236e-04", + "loss": 0.4831, + "slid_loss": 0.5413, + "step": 4745, + "time": 12.47 + }, + { + "epoch": 4.56, + "learning_rate": "1.0235e-04", + "loss": 0.5764, + "slid_loss": 0.5414, + "step": 4746, + "time": 12.98 + }, + { + "epoch": 4.56, + "learning_rate": "1.0234e-04", + "loss": 0.5058, + "slid_loss": 0.5407, + "step": 4747, + "time": 13.74 + }, + { + "epoch": 4.56, + "learning_rate": "1.0233e-04", + "loss": 0.5507, + "slid_loss": 0.542, + "step": 4748, + "time": 13.42 + }, + { + "epoch": 4.56, + "learning_rate": "1.0232e-04", + "loss": 0.535, + "slid_loss": 0.5425, + "step": 4749, + "time": 13.18 + }, + { + "epoch": 4.56, + "learning_rate": "1.0231e-04", + "loss": 0.5451, + "slid_loss": 0.542, + "step": 4750, + "time": 13.39 + }, + { + "epoch": 4.56, + "learning_rate": "1.0230e-04", + "loss": 0.6298, + "slid_loss": 0.542, + "step": 4751, + "time": 12.84 + }, + { + "epoch": 4.56, + "learning_rate": "1.0229e-04", + "loss": 0.5798, + "slid_loss": 0.5424, + "step": 4752, + "time": 12.83 + }, + { + "epoch": 4.57, + "learning_rate": "1.0228e-04", + "loss": 0.6316, + "slid_loss": 0.5436, + "step": 4753, + "time": 13.97 + }, + { + "epoch": 4.57, + "learning_rate": "1.0227e-04", + "loss": 0.619, + "slid_loss": 0.5444, + "step": 4754, + "time": 12.78 + }, + { + "epoch": 4.57, + "learning_rate": "1.0226e-04", + "loss": 0.5301, + "slid_loss": 0.5439, + "step": 4755, + "time": 13.45 + }, + { + "epoch": 4.57, + "learning_rate": "1.0225e-04", + "loss": 0.5216, + "slid_loss": 0.5433, + "step": 4756, + "time": 13.17 + }, + { + "epoch": 4.57, + "learning_rate": "1.0224e-04", + "loss": 0.5148, + "slid_loss": 0.5434, + "step": 4757, + "time": 13.2 + }, + { + "epoch": 4.57, + "learning_rate": "1.0223e-04", + "loss": 0.4781, + "slid_loss": 0.542, + "step": 4758, + "time": 13.42 + }, + { + "epoch": 4.57, + "learning_rate": "1.0222e-04", + "loss": 0.447, + "slid_loss": 0.5415, + "step": 4759, + "time": 13.24 + }, + { + "epoch": 4.57, + "learning_rate": "1.0221e-04", + "loss": 0.4941, + "slid_loss": 0.541, + "step": 4760, + "time": 12.86 + }, + { + "epoch": 4.57, + "learning_rate": "1.0220e-04", + "loss": 0.5129, + "slid_loss": 0.5409, + "step": 4761, + "time": 13.3 + }, + { + "epoch": 4.57, + "learning_rate": "1.0219e-04", + "loss": 0.5654, + "slid_loss": 0.5413, + "step": 4762, + "time": 11.38 + }, + { + "epoch": 4.58, + "learning_rate": "1.0218e-04", + "loss": 0.524, + "slid_loss": 0.5419, + "step": 4763, + "time": 13.47 + }, + { + "epoch": 4.58, + "learning_rate": "1.0217e-04", + "loss": 0.508, + "slid_loss": 0.5415, + "step": 4764, + "time": 13.3 + }, + { + "epoch": 4.58, + "learning_rate": "1.0216e-04", + "loss": 0.5876, + "slid_loss": 0.5427, + "step": 4765, + "time": 11.86 + }, + { + "epoch": 4.58, + "learning_rate": "1.0215e-04", + "loss": 0.5516, + "slid_loss": 0.5425, + "step": 4766, + "time": 13.41 + }, + { + "epoch": 4.58, + "learning_rate": "1.0214e-04", + "loss": 0.5895, + "slid_loss": 0.5426, + "step": 4767, + "time": 11.75 + }, + { + "epoch": 4.58, + "learning_rate": "1.0213e-04", + "loss": 0.6036, + "slid_loss": 0.5433, + "step": 4768, + "time": 12.22 + }, + { + "epoch": 4.58, + "learning_rate": "1.0212e-04", + "loss": 0.4842, + "slid_loss": 0.5427, + "step": 4769, + "time": 14.2 + }, + { + "epoch": 4.58, + "learning_rate": "1.0211e-04", + "loss": 0.5387, + "slid_loss": 0.5427, + "step": 4770, + "time": 12.76 + }, + { + "epoch": 4.58, + "learning_rate": "1.0210e-04", + "loss": 0.5536, + "slid_loss": 0.5422, + "step": 4771, + "time": 11.43 + }, + { + "epoch": 4.58, + "learning_rate": "1.0209e-04", + "loss": 0.5464, + "slid_loss": 0.5429, + "step": 4772, + "time": 13.4 + }, + { + "epoch": 4.59, + "learning_rate": "1.0208e-04", + "loss": 0.4983, + "slid_loss": 0.5428, + "step": 4773, + "time": 12.98 + }, + { + "epoch": 4.59, + "learning_rate": "1.0207e-04", + "loss": 0.5401, + "slid_loss": 0.5426, + "step": 4774, + "time": 13.93 + }, + { + "epoch": 4.59, + "learning_rate": "1.0207e-04", + "loss": 0.5177, + "slid_loss": 0.5423, + "step": 4775, + "time": 13.72 + }, + { + "epoch": 4.59, + "learning_rate": "1.0206e-04", + "loss": 0.5534, + "slid_loss": 0.543, + "step": 4776, + "time": 13.91 + }, + { + "epoch": 4.59, + "learning_rate": "1.0205e-04", + "loss": 0.4727, + "slid_loss": 0.5421, + "step": 4777, + "time": 12.35 + }, + { + "epoch": 4.59, + "learning_rate": "1.0204e-04", + "loss": 0.5425, + "slid_loss": 0.5416, + "step": 4778, + "time": 12.97 + }, + { + "epoch": 4.59, + "learning_rate": "1.0203e-04", + "loss": 0.5913, + "slid_loss": 0.5421, + "step": 4779, + "time": 14.16 + }, + { + "epoch": 4.59, + "learning_rate": "1.0202e-04", + "loss": 0.4712, + "slid_loss": 0.5417, + "step": 4780, + "time": 12.23 + }, + { + "epoch": 4.59, + "learning_rate": "1.0201e-04", + "loss": 0.5199, + "slid_loss": 0.5411, + "step": 4781, + "time": 14.38 + }, + { + "epoch": 4.59, + "learning_rate": "1.0200e-04", + "loss": 0.5233, + "slid_loss": 0.5416, + "step": 4782, + "time": 13.77 + }, + { + "epoch": 4.59, + "learning_rate": "1.0199e-04", + "loss": 0.5421, + "slid_loss": 0.5421, + "step": 4783, + "time": 13.19 + }, + { + "epoch": 4.6, + "learning_rate": "1.0198e-04", + "loss": 0.5507, + "slid_loss": 0.5421, + "step": 4784, + "time": 13.39 + }, + { + "epoch": 4.6, + "learning_rate": "1.0197e-04", + "loss": 0.5617, + "slid_loss": 0.5414, + "step": 4785, + "time": 14.0 + }, + { + "epoch": 4.6, + "learning_rate": "1.0196e-04", + "loss": 0.4796, + "slid_loss": 0.5403, + "step": 4786, + "time": 13.62 + }, + { + "epoch": 4.6, + "learning_rate": "1.0195e-04", + "loss": 0.4877, + "slid_loss": 0.5394, + "step": 4787, + "time": 13.93 + }, + { + "epoch": 4.6, + "learning_rate": "1.0194e-04", + "loss": 0.5647, + "slid_loss": 0.5399, + "step": 4788, + "time": 13.39 + }, + { + "epoch": 4.6, + "learning_rate": "1.0193e-04", + "loss": 0.4568, + "slid_loss": 0.5391, + "step": 4789, + "time": 13.87 + }, + { + "epoch": 4.6, + "learning_rate": "1.0192e-04", + "loss": 0.4782, + "slid_loss": 0.5382, + "step": 4790, + "time": 11.93 + }, + { + "epoch": 4.6, + "learning_rate": "1.0192e-04", + "loss": 0.5409, + "slid_loss": 0.538, + "step": 4791, + "time": 12.62 + }, + { + "epoch": 4.6, + "learning_rate": "1.0191e-04", + "loss": 0.5955, + "slid_loss": 0.5389, + "step": 4792, + "time": 11.76 + }, + { + "epoch": 4.6, + "learning_rate": "1.0190e-04", + "loss": 0.5565, + "slid_loss": 0.5388, + "step": 4793, + "time": 13.1 + }, + { + "epoch": 4.61, + "learning_rate": "1.0189e-04", + "loss": 0.4946, + "slid_loss": 0.5386, + "step": 4794, + "time": 13.62 + }, + { + "epoch": 4.61, + "learning_rate": "1.0188e-04", + "loss": 0.5959, + "slid_loss": 0.5393, + "step": 4795, + "time": 13.72 + }, + { + "epoch": 4.61, + "learning_rate": "1.0187e-04", + "loss": 0.4435, + "slid_loss": 0.5379, + "step": 4796, + "time": 13.64 + }, + { + "epoch": 4.61, + "learning_rate": "1.0186e-04", + "loss": 0.5816, + "slid_loss": 0.5381, + "step": 4797, + "time": 11.35 + }, + { + "epoch": 4.61, + "learning_rate": "1.0185e-04", + "loss": 0.5437, + "slid_loss": 0.5383, + "step": 4798, + "time": 13.96 + }, + { + "epoch": 4.61, + "learning_rate": "1.0184e-04", + "loss": 0.5581, + "slid_loss": 0.5384, + "step": 4799, + "time": 12.81 + }, + { + "epoch": 4.61, + "learning_rate": "1.0183e-04", + "loss": 0.564, + "slid_loss": 0.5388, + "step": 4800, + "time": 12.32 + }, + { + "epoch": 4.61, + "learning_rate": "1.0182e-04", + "loss": 0.5373, + "slid_loss": 0.539, + "step": 4801, + "time": 13.16 + }, + { + "epoch": 4.61, + "learning_rate": "1.0182e-04", + "loss": 0.5305, + "slid_loss": 0.539, + "step": 4802, + "time": 13.7 + }, + { + "epoch": 4.61, + "learning_rate": "1.0181e-04", + "loss": 0.4863, + "slid_loss": 0.539, + "step": 4803, + "time": 13.77 + }, + { + "epoch": 4.61, + "learning_rate": "1.0180e-04", + "loss": 0.5463, + "slid_loss": 0.5388, + "step": 4804, + "time": 11.69 + }, + { + "epoch": 4.62, + "learning_rate": "1.0179e-04", + "loss": 0.4792, + "slid_loss": 0.5383, + "step": 4805, + "time": 14.12 + }, + { + "epoch": 4.62, + "learning_rate": "1.0178e-04", + "loss": 0.4802, + "slid_loss": 0.538, + "step": 4806, + "time": 13.33 + }, + { + "epoch": 4.62, + "learning_rate": "1.0177e-04", + "loss": 0.5645, + "slid_loss": 0.5388, + "step": 4807, + "time": 13.32 + }, + { + "epoch": 4.62, + "learning_rate": "1.0176e-04", + "loss": 0.5218, + "slid_loss": 0.5384, + "step": 4808, + "time": 13.73 + }, + { + "epoch": 4.62, + "learning_rate": "1.0175e-04", + "loss": 0.5438, + "slid_loss": 0.5386, + "step": 4809, + "time": 11.07 + }, + { + "epoch": 4.62, + "learning_rate": "1.0174e-04", + "loss": 0.4951, + "slid_loss": 0.538, + "step": 4810, + "time": 13.02 + }, + { + "epoch": 4.62, + "learning_rate": "1.0174e-04", + "loss": 0.4749, + "slid_loss": 0.5371, + "step": 4811, + "time": 13.35 + }, + { + "epoch": 4.62, + "learning_rate": "1.0173e-04", + "loss": 0.4724, + "slid_loss": 0.5362, + "step": 4812, + "time": 12.27 + }, + { + "epoch": 4.62, + "learning_rate": "1.0172e-04", + "loss": 0.535, + "slid_loss": 0.5362, + "step": 4813, + "time": 11.1 + }, + { + "epoch": 4.62, + "learning_rate": "1.0171e-04", + "loss": 0.563, + "slid_loss": 0.5375, + "step": 4814, + "time": 12.89 + }, + { + "epoch": 4.63, + "learning_rate": "1.0170e-04", + "loss": 0.5637, + "slid_loss": 0.5379, + "step": 4815, + "time": 13.18 + }, + { + "epoch": 4.63, + "learning_rate": "1.0169e-04", + "loss": 0.5747, + "slid_loss": 0.5375, + "step": 4816, + "time": 13.72 + }, + { + "epoch": 4.63, + "learning_rate": "1.0168e-04", + "loss": 0.472, + "slid_loss": 0.5369, + "step": 4817, + "time": 13.28 + }, + { + "epoch": 4.63, + "learning_rate": "1.0167e-04", + "loss": 0.6013, + "slid_loss": 0.537, + "step": 4818, + "time": 13.83 + }, + { + "epoch": 4.63, + "learning_rate": "1.0167e-04", + "loss": 0.5446, + "slid_loss": 0.5371, + "step": 4819, + "time": 12.02 + }, + { + "epoch": 4.63, + "learning_rate": "1.0166e-04", + "loss": 0.5393, + "slid_loss": 0.5365, + "step": 4820, + "time": 13.69 + }, + { + "epoch": 4.63, + "learning_rate": "1.0165e-04", + "loss": 0.638, + "slid_loss": 0.5377, + "step": 4821, + "time": 13.4 + }, + { + "epoch": 4.63, + "learning_rate": "1.0164e-04", + "loss": 0.5387, + "slid_loss": 0.5379, + "step": 4822, + "time": 13.58 + }, + { + "epoch": 4.63, + "learning_rate": "1.0163e-04", + "loss": 0.5703, + "slid_loss": 0.5378, + "step": 4823, + "time": 12.88 + }, + { + "epoch": 4.63, + "learning_rate": "1.0162e-04", + "loss": 0.5315, + "slid_loss": 0.5375, + "step": 4824, + "time": 12.82 + }, + { + "epoch": 4.63, + "learning_rate": "1.0162e-04", + "loss": 0.6047, + "slid_loss": 0.5376, + "step": 4825, + "time": 11.26 + }, + { + "epoch": 4.64, + "learning_rate": "1.0161e-04", + "loss": 0.5411, + "slid_loss": 0.5377, + "step": 4826, + "time": 12.11 + }, + { + "epoch": 4.64, + "learning_rate": "1.0160e-04", + "loss": 0.5387, + "slid_loss": 0.5379, + "step": 4827, + "time": 13.34 + }, + { + "epoch": 4.64, + "learning_rate": "1.0159e-04", + "loss": 0.4582, + "slid_loss": 0.5368, + "step": 4828, + "time": 12.89 + }, + { + "epoch": 4.64, + "learning_rate": "1.0158e-04", + "loss": 0.4897, + "slid_loss": 0.536, + "step": 4829, + "time": 13.16 + }, + { + "epoch": 4.64, + "learning_rate": "1.0157e-04", + "loss": 0.5776, + "slid_loss": 0.537, + "step": 4830, + "time": 13.29 + }, + { + "epoch": 4.64, + "learning_rate": "1.0156e-04", + "loss": 0.5501, + "slid_loss": 0.5372, + "step": 4831, + "time": 13.51 + }, + { + "epoch": 4.64, + "learning_rate": "1.0156e-04", + "loss": 0.6115, + "slid_loss": 0.5379, + "step": 4832, + "time": 11.29 + }, + { + "epoch": 4.64, + "learning_rate": "1.0155e-04", + "loss": 0.5389, + "slid_loss": 0.5374, + "step": 4833, + "time": 11.36 + }, + { + "epoch": 4.64, + "learning_rate": "1.0154e-04", + "loss": 0.5609, + "slid_loss": 0.5375, + "step": 4834, + "time": 12.92 + }, + { + "epoch": 4.64, + "learning_rate": "1.0153e-04", + "loss": 0.5405, + "slid_loss": 0.5379, + "step": 4835, + "time": 12.81 + }, + { + "epoch": 4.65, + "learning_rate": "1.0152e-04", + "loss": 0.5282, + "slid_loss": 0.5377, + "step": 4836, + "time": 13.6 + }, + { + "epoch": 4.65, + "learning_rate": "1.0152e-04", + "loss": 0.4853, + "slid_loss": 0.5367, + "step": 4837, + "time": 12.4 + }, + { + "epoch": 4.65, + "learning_rate": "1.0151e-04", + "loss": 0.4882, + "slid_loss": 0.5364, + "step": 4838, + "time": 11.86 + }, + { + "epoch": 4.65, + "learning_rate": "1.0150e-04", + "loss": 0.5861, + "slid_loss": 0.5371, + "step": 4839, + "time": 12.96 + }, + { + "epoch": 4.65, + "learning_rate": "1.0149e-04", + "loss": 0.5277, + "slid_loss": 0.5369, + "step": 4840, + "time": 12.24 + }, + { + "epoch": 4.65, + "learning_rate": "1.0148e-04", + "loss": 0.5788, + "slid_loss": 0.5368, + "step": 4841, + "time": 13.37 + }, + { + "epoch": 4.65, + "learning_rate": "1.0147e-04", + "loss": 0.5523, + "slid_loss": 0.5365, + "step": 4842, + "time": 12.74 + }, + { + "epoch": 4.65, + "learning_rate": "1.0147e-04", + "loss": 0.4296, + "slid_loss": 0.5356, + "step": 4843, + "time": 14.0 + }, + { + "epoch": 4.65, + "learning_rate": "1.0146e-04", + "loss": 0.5319, + "slid_loss": 0.5356, + "step": 4844, + "time": 14.01 + }, + { + "epoch": 4.65, + "learning_rate": "1.0145e-04", + "loss": 0.5942, + "slid_loss": 0.5367, + "step": 4845, + "time": 14.16 + }, + { + "epoch": 4.66, + "learning_rate": "1.0144e-04", + "loss": 0.5672, + "slid_loss": 0.5366, + "step": 4846, + "time": 11.48 + }, + { + "epoch": 4.66, + "learning_rate": "1.0143e-04", + "loss": 0.5001, + "slid_loss": 0.5365, + "step": 4847, + "time": 13.91 + }, + { + "epoch": 4.66, + "learning_rate": "1.0143e-04", + "loss": 0.4096, + "slid_loss": 0.5351, + "step": 4848, + "time": 13.3 + }, + { + "epoch": 4.66, + "learning_rate": "1.0142e-04", + "loss": 0.5526, + "slid_loss": 0.5353, + "step": 4849, + "time": 13.33 + }, + { + "epoch": 4.66, + "learning_rate": "1.0141e-04", + "loss": 0.4809, + "slid_loss": 0.5347, + "step": 4850, + "time": 13.11 + }, + { + "epoch": 4.66, + "learning_rate": "1.0140e-04", + "loss": 0.5528, + "slid_loss": 0.5339, + "step": 4851, + "time": 13.84 + }, + { + "epoch": 4.66, + "learning_rate": "1.0139e-04", + "loss": 0.515, + "slid_loss": 0.5332, + "step": 4852, + "time": 13.72 + }, + { + "epoch": 4.66, + "learning_rate": "1.0139e-04", + "loss": 0.5603, + "slid_loss": 0.5325, + "step": 4853, + "time": 12.76 + }, + { + "epoch": 4.66, + "learning_rate": "1.0138e-04", + "loss": 0.5929, + "slid_loss": 0.5323, + "step": 4854, + "time": 13.49 + }, + { + "epoch": 4.66, + "learning_rate": "1.0137e-04", + "loss": 0.5485, + "slid_loss": 0.5325, + "step": 4855, + "time": 13.5 + }, + { + "epoch": 4.66, + "learning_rate": "1.0136e-04", + "loss": 0.4972, + "slid_loss": 0.5322, + "step": 4856, + "time": 12.22 + }, + { + "epoch": 4.67, + "learning_rate": "1.0136e-04", + "loss": 0.4847, + "slid_loss": 0.5319, + "step": 4857, + "time": 13.96 + }, + { + "epoch": 4.67, + "learning_rate": "1.0135e-04", + "loss": 0.514, + "slid_loss": 0.5323, + "step": 4858, + "time": 12.94 + }, + { + "epoch": 4.67, + "learning_rate": "1.0134e-04", + "loss": 0.571, + "slid_loss": 0.5335, + "step": 4859, + "time": 11.86 + }, + { + "epoch": 4.67, + "learning_rate": "1.0133e-04", + "loss": 0.4894, + "slid_loss": 0.5335, + "step": 4860, + "time": 12.24 + }, + { + "epoch": 4.67, + "learning_rate": "1.0132e-04", + "loss": 0.4898, + "slid_loss": 0.5332, + "step": 4861, + "time": 13.27 + }, + { + "epoch": 4.67, + "learning_rate": "1.0132e-04", + "loss": 0.5819, + "slid_loss": 0.5334, + "step": 4862, + "time": 13.35 + }, + { + "epoch": 4.67, + "learning_rate": "1.0131e-04", + "loss": 0.5666, + "slid_loss": 0.5338, + "step": 4863, + "time": 13.4 + }, + { + "epoch": 4.67, + "learning_rate": "1.0130e-04", + "loss": 0.4971, + "slid_loss": 0.5337, + "step": 4864, + "time": 13.56 + }, + { + "epoch": 4.67, + "learning_rate": "1.0129e-04", + "loss": 0.5411, + "slid_loss": 0.5333, + "step": 4865, + "time": 12.97 + }, + { + "epoch": 4.67, + "learning_rate": "1.0129e-04", + "loss": 0.5699, + "slid_loss": 0.5334, + "step": 4866, + "time": 12.42 + }, + { + "epoch": 4.68, + "learning_rate": "1.0128e-04", + "loss": 0.5822, + "slid_loss": 0.5334, + "step": 4867, + "time": 13.66 + }, + { + "epoch": 4.68, + "learning_rate": "1.0127e-04", + "loss": 0.5203, + "slid_loss": 0.5325, + "step": 4868, + "time": 11.11 + }, + { + "epoch": 4.68, + "learning_rate": "1.0126e-04", + "loss": 0.5954, + "slid_loss": 0.5336, + "step": 4869, + "time": 12.32 + }, + { + "epoch": 4.68, + "learning_rate": "1.0126e-04", + "loss": 0.5122, + "slid_loss": 0.5334, + "step": 4870, + "time": 14.23 + }, + { + "epoch": 4.68, + "learning_rate": "1.0125e-04", + "loss": 0.5229, + "slid_loss": 0.5331, + "step": 4871, + "time": 13.52 + }, + { + "epoch": 4.68, + "learning_rate": "1.0124e-04", + "loss": 0.4339, + "slid_loss": 0.5319, + "step": 4872, + "time": 12.25 + }, + { + "epoch": 4.68, + "learning_rate": "1.0123e-04", + "loss": 0.5581, + "slid_loss": 0.5325, + "step": 4873, + "time": 13.67 + }, + { + "epoch": 4.68, + "learning_rate": "1.0123e-04", + "loss": 0.5138, + "slid_loss": 0.5323, + "step": 4874, + "time": 11.73 + }, + { + "epoch": 4.68, + "learning_rate": "1.0122e-04", + "loss": 0.5314, + "slid_loss": 0.5324, + "step": 4875, + "time": 13.42 + }, + { + "epoch": 4.68, + "learning_rate": "1.0121e-04", + "loss": 0.5048, + "slid_loss": 0.5319, + "step": 4876, + "time": 13.28 + }, + { + "epoch": 4.68, + "learning_rate": "1.0121e-04", + "loss": 0.5344, + "slid_loss": 0.5325, + "step": 4877, + "time": 13.32 + }, + { + "epoch": 4.69, + "learning_rate": "1.0120e-04", + "loss": 0.5232, + "slid_loss": 0.5324, + "step": 4878, + "time": 12.56 + }, + { + "epoch": 4.69, + "learning_rate": "1.0119e-04", + "loss": 0.5407, + "slid_loss": 0.5318, + "step": 4879, + "time": 11.73 + }, + { + "epoch": 4.69, + "learning_rate": "1.0118e-04", + "loss": 0.5886, + "slid_loss": 0.533, + "step": 4880, + "time": 13.38 + }, + { + "epoch": 4.69, + "learning_rate": "1.0118e-04", + "loss": 0.5512, + "slid_loss": 0.5333, + "step": 4881, + "time": 13.77 + }, + { + "epoch": 4.69, + "learning_rate": "1.0117e-04", + "loss": 0.5039, + "slid_loss": 0.5331, + "step": 4882, + "time": 12.24 + }, + { + "epoch": 4.69, + "learning_rate": "1.0116e-04", + "loss": 0.512, + "slid_loss": 0.5328, + "step": 4883, + "time": 13.28 + }, + { + "epoch": 4.69, + "learning_rate": "1.0115e-04", + "loss": 0.4934, + "slid_loss": 0.5323, + "step": 4884, + "time": 13.69 + }, + { + "epoch": 4.69, + "learning_rate": "1.0115e-04", + "loss": 0.6205, + "slid_loss": 0.5329, + "step": 4885, + "time": 12.69 + }, + { + "epoch": 4.69, + "learning_rate": "1.0114e-04", + "loss": 0.4623, + "slid_loss": 0.5327, + "step": 4886, + "time": 11.34 + }, + { + "epoch": 4.69, + "learning_rate": "1.0113e-04", + "loss": 0.55, + "slid_loss": 0.5333, + "step": 4887, + "time": 14.09 + }, + { + "epoch": 4.7, + "learning_rate": "1.0113e-04", + "loss": 0.5231, + "slid_loss": 0.5329, + "step": 4888, + "time": 13.74 + }, + { + "epoch": 4.7, + "learning_rate": "1.0112e-04", + "loss": 0.5807, + "slid_loss": 0.5341, + "step": 4889, + "time": 12.74 + }, + { + "epoch": 4.7, + "learning_rate": "1.0111e-04", + "loss": 0.5082, + "slid_loss": 0.5344, + "step": 4890, + "time": 12.84 + }, + { + "epoch": 4.7, + "learning_rate": "1.0110e-04", + "loss": 0.5107, + "slid_loss": 0.5341, + "step": 4891, + "time": 11.39 + }, + { + "epoch": 4.7, + "learning_rate": "1.0110e-04", + "loss": 0.5093, + "slid_loss": 0.5333, + "step": 4892, + "time": 13.93 + }, + { + "epoch": 4.7, + "learning_rate": "1.0109e-04", + "loss": 0.5792, + "slid_loss": 0.5335, + "step": 4893, + "time": 12.84 + }, + { + "epoch": 4.7, + "learning_rate": "1.0108e-04", + "loss": 0.5227, + "slid_loss": 0.5338, + "step": 4894, + "time": 13.51 + }, + { + "epoch": 4.7, + "learning_rate": "1.0108e-04", + "loss": 0.5081, + "slid_loss": 0.5329, + "step": 4895, + "time": 13.11 + }, + { + "epoch": 4.7, + "learning_rate": "1.0107e-04", + "loss": 0.5707, + "slid_loss": 0.5342, + "step": 4896, + "time": 13.45 + }, + { + "epoch": 4.7, + "learning_rate": "1.0106e-04", + "loss": 0.5116, + "slid_loss": 0.5335, + "step": 4897, + "time": 12.84 + }, + { + "epoch": 4.71, + "learning_rate": "1.0106e-04", + "loss": 0.5792, + "slid_loss": 0.5338, + "step": 4898, + "time": 13.76 + }, + { + "epoch": 4.71, + "learning_rate": "1.0105e-04", + "loss": 0.5284, + "slid_loss": 0.5335, + "step": 4899, + "time": 11.69 + }, + { + "epoch": 4.71, + "learning_rate": "1.0104e-04", + "loss": 0.4811, + "slid_loss": 0.5327, + "step": 4900, + "time": 13.39 + }, + { + "epoch": 4.71, + "learning_rate": "1.0104e-04", + "loss": 0.5504, + "slid_loss": 0.5328, + "step": 4901, + "time": 13.78 + }, + { + "epoch": 4.71, + "learning_rate": "1.0103e-04", + "loss": 0.5991, + "slid_loss": 0.5335, + "step": 4902, + "time": 13.41 + }, + { + "epoch": 4.71, + "learning_rate": "1.0102e-04", + "loss": 0.5009, + "slid_loss": 0.5337, + "step": 4903, + "time": 12.93 + }, + { + "epoch": 4.71, + "learning_rate": "1.0102e-04", + "loss": 0.5484, + "slid_loss": 0.5337, + "step": 4904, + "time": 13.36 + }, + { + "epoch": 4.71, + "learning_rate": "1.0101e-04", + "loss": 0.5095, + "slid_loss": 0.534, + "step": 4905, + "time": 12.86 + }, + { + "epoch": 4.71, + "learning_rate": "1.0100e-04", + "loss": 0.4432, + "slid_loss": 0.5336, + "step": 4906, + "time": 13.21 + }, + { + "epoch": 4.71, + "learning_rate": "1.0100e-04", + "loss": 0.549, + "slid_loss": 0.5335, + "step": 4907, + "time": 13.43 + }, + { + "epoch": 4.71, + "learning_rate": "1.0099e-04", + "loss": 0.5684, + "slid_loss": 0.5339, + "step": 4908, + "time": 13.35 + }, + { + "epoch": 4.72, + "learning_rate": "1.0098e-04", + "loss": 0.5753, + "slid_loss": 0.5342, + "step": 4909, + "time": 13.7 + }, + { + "epoch": 4.72, + "learning_rate": "1.0098e-04", + "loss": 0.4767, + "slid_loss": 0.534, + "step": 4910, + "time": 14.32 + }, + { + "epoch": 4.72, + "learning_rate": "1.0097e-04", + "loss": 0.5007, + "slid_loss": 0.5343, + "step": 4911, + "time": 13.72 + }, + { + "epoch": 4.72, + "learning_rate": "1.0096e-04", + "loss": 0.5482, + "slid_loss": 0.5351, + "step": 4912, + "time": 12.89 + }, + { + "epoch": 4.72, + "learning_rate": "1.0096e-04", + "loss": 0.5575, + "slid_loss": 0.5353, + "step": 4913, + "time": 13.76 + }, + { + "epoch": 4.72, + "learning_rate": "1.0095e-04", + "loss": 0.5232, + "slid_loss": 0.5349, + "step": 4914, + "time": 13.28 + }, + { + "epoch": 4.72, + "learning_rate": "1.0094e-04", + "loss": 0.4772, + "slid_loss": 0.534, + "step": 4915, + "time": 13.11 + }, + { + "epoch": 4.72, + "learning_rate": "1.0094e-04", + "loss": 0.537, + "slid_loss": 0.5336, + "step": 4916, + "time": 13.13 + }, + { + "epoch": 4.72, + "learning_rate": "1.0093e-04", + "loss": 0.4641, + "slid_loss": 0.5336, + "step": 4917, + "time": 13.95 + }, + { + "epoch": 4.72, + "learning_rate": "1.0092e-04", + "loss": 0.5912, + "slid_loss": 0.5335, + "step": 4918, + "time": 13.0 + }, + { + "epoch": 4.73, + "learning_rate": "1.0092e-04", + "loss": 0.5061, + "slid_loss": 0.5331, + "step": 4919, + "time": 13.0 + }, + { + "epoch": 4.73, + "learning_rate": "1.0091e-04", + "loss": 0.6186, + "slid_loss": 0.5339, + "step": 4920, + "time": 13.18 + }, + { + "epoch": 4.73, + "learning_rate": "1.0090e-04", + "loss": 0.4853, + "slid_loss": 0.5323, + "step": 4921, + "time": 13.2 + }, + { + "epoch": 4.73, + "learning_rate": "1.0090e-04", + "loss": 0.5163, + "slid_loss": 0.5321, + "step": 4922, + "time": 13.03 + }, + { + "epoch": 4.73, + "learning_rate": "1.0089e-04", + "loss": 0.5821, + "slid_loss": 0.5322, + "step": 4923, + "time": 13.26 + }, + { + "epoch": 4.73, + "learning_rate": "1.0089e-04", + "loss": 0.5226, + "slid_loss": 0.5322, + "step": 4924, + "time": 11.58 + }, + { + "epoch": 4.73, + "learning_rate": "1.0088e-04", + "loss": 0.5353, + "slid_loss": 0.5315, + "step": 4925, + "time": 13.31 + }, + { + "epoch": 4.73, + "learning_rate": "1.0087e-04", + "loss": 0.5967, + "slid_loss": 0.532, + "step": 4926, + "time": 12.73 + }, + { + "epoch": 4.73, + "learning_rate": "1.0087e-04", + "loss": 0.5595, + "slid_loss": 0.5322, + "step": 4927, + "time": 12.78 + }, + { + "epoch": 4.73, + "learning_rate": "1.0086e-04", + "loss": 0.4512, + "slid_loss": 0.5322, + "step": 4928, + "time": 11.97 + }, + { + "epoch": 4.73, + "learning_rate": "1.0085e-04", + "loss": 0.5241, + "slid_loss": 0.5325, + "step": 4929, + "time": 13.11 + }, + { + "epoch": 4.74, + "learning_rate": "1.0085e-04", + "loss": 0.4569, + "slid_loss": 0.5313, + "step": 4930, + "time": 12.3 + }, + { + "epoch": 4.74, + "learning_rate": "1.0084e-04", + "loss": 0.5759, + "slid_loss": 0.5316, + "step": 4931, + "time": 13.29 + }, + { + "epoch": 4.74, + "learning_rate": "1.0084e-04", + "loss": 0.5114, + "slid_loss": 0.5305, + "step": 4932, + "time": 12.68 + }, + { + "epoch": 4.74, + "learning_rate": "1.0083e-04", + "loss": 0.4793, + "slid_loss": 0.53, + "step": 4933, + "time": 13.24 + }, + { + "epoch": 4.74, + "learning_rate": "1.0082e-04", + "loss": 0.5702, + "slid_loss": 0.53, + "step": 4934, + "time": 13.43 + }, + { + "epoch": 4.74, + "learning_rate": "1.0082e-04", + "loss": 0.4865, + "slid_loss": 0.5295, + "step": 4935, + "time": 13.84 + }, + { + "epoch": 4.74, + "learning_rate": "1.0081e-04", + "loss": 0.5044, + "slid_loss": 0.5293, + "step": 4936, + "time": 13.33 + }, + { + "epoch": 4.74, + "learning_rate": "1.0081e-04", + "loss": 0.594, + "slid_loss": 0.5304, + "step": 4937, + "time": 11.58 + }, + { + "epoch": 4.74, + "learning_rate": "1.0080e-04", + "loss": 0.5227, + "slid_loss": 0.5307, + "step": 4938, + "time": 14.01 + }, + { + "epoch": 4.74, + "learning_rate": "1.0079e-04", + "loss": 0.602, + "slid_loss": 0.5309, + "step": 4939, + "time": 12.47 + }, + { + "epoch": 4.75, + "learning_rate": "1.0079e-04", + "loss": 0.502, + "slid_loss": 0.5306, + "step": 4940, + "time": 12.3 + }, + { + "epoch": 4.75, + "learning_rate": "1.0078e-04", + "loss": 0.4923, + "slid_loss": 0.5297, + "step": 4941, + "time": 11.43 + }, + { + "epoch": 4.75, + "learning_rate": "1.0078e-04", + "loss": 0.4616, + "slid_loss": 0.5288, + "step": 4942, + "time": 13.39 + }, + { + "epoch": 4.75, + "learning_rate": "1.0077e-04", + "loss": 0.5903, + "slid_loss": 0.5304, + "step": 4943, + "time": 14.1 + }, + { + "epoch": 4.75, + "learning_rate": "1.0076e-04", + "loss": 0.4661, + "slid_loss": 0.5298, + "step": 4944, + "time": 12.82 + }, + { + "epoch": 4.75, + "learning_rate": "1.0076e-04", + "loss": 0.53, + "slid_loss": 0.5291, + "step": 4945, + "time": 14.18 + }, + { + "epoch": 4.75, + "learning_rate": "1.0075e-04", + "loss": 0.54, + "slid_loss": 0.5289, + "step": 4946, + "time": 13.48 + }, + { + "epoch": 4.75, + "learning_rate": "1.0075e-04", + "loss": 0.5015, + "slid_loss": 0.5289, + "step": 4947, + "time": 12.28 + }, + { + "epoch": 4.75, + "learning_rate": "1.0074e-04", + "loss": 0.4486, + "slid_loss": 0.5293, + "step": 4948, + "time": 13.25 + }, + { + "epoch": 4.75, + "learning_rate": "1.0074e-04", + "loss": 0.5545, + "slid_loss": 0.5293, + "step": 4949, + "time": 12.17 + }, + { + "epoch": 4.76, + "learning_rate": "1.0073e-04", + "loss": 0.5359, + "slid_loss": 0.5298, + "step": 4950, + "time": 10.97 + }, + { + "epoch": 4.76, + "learning_rate": "1.0072e-04", + "loss": 0.5002, + "slid_loss": 0.5293, + "step": 4951, + "time": 11.92 + }, + { + "epoch": 4.76, + "learning_rate": "1.0072e-04", + "loss": 0.5385, + "slid_loss": 0.5295, + "step": 4952, + "time": 13.02 + }, + { + "epoch": 4.76, + "learning_rate": "1.0071e-04", + "loss": 0.5919, + "slid_loss": 0.5299, + "step": 4953, + "time": 13.86 + }, + { + "epoch": 4.76, + "learning_rate": "1.0071e-04", + "loss": 0.5022, + "slid_loss": 0.529, + "step": 4954, + "time": 11.37 + }, + { + "epoch": 4.76, + "learning_rate": "1.0070e-04", + "loss": 0.5545, + "slid_loss": 0.529, + "step": 4955, + "time": 13.29 + }, + { + "epoch": 4.76, + "learning_rate": "1.0070e-04", + "loss": 0.4945, + "slid_loss": 0.529, + "step": 4956, + "time": 13.47 + }, + { + "epoch": 4.76, + "learning_rate": "1.0069e-04", + "loss": 0.5483, + "slid_loss": 0.5296, + "step": 4957, + "time": 14.25 + }, + { + "epoch": 4.76, + "learning_rate": "1.0068e-04", + "loss": 0.5457, + "slid_loss": 0.5299, + "step": 4958, + "time": 13.12 + }, + { + "epoch": 4.76, + "learning_rate": "1.0068e-04", + "loss": 0.521, + "slid_loss": 0.5294, + "step": 4959, + "time": 11.75 + }, + { + "epoch": 4.76, + "learning_rate": "1.0067e-04", + "loss": 0.4782, + "slid_loss": 0.5293, + "step": 4960, + "time": 11.94 + }, + { + "epoch": 4.77, + "learning_rate": "1.0067e-04", + "loss": 0.5723, + "slid_loss": 0.5302, + "step": 4961, + "time": 13.05 + }, + { + "epoch": 4.77, + "learning_rate": "1.0066e-04", + "loss": 0.4774, + "slid_loss": 0.5291, + "step": 4962, + "time": 14.26 + }, + { + "epoch": 4.77, + "learning_rate": "1.0066e-04", + "loss": 0.4538, + "slid_loss": 0.528, + "step": 4963, + "time": 12.5 + }, + { + "epoch": 4.77, + "learning_rate": "1.0065e-04", + "loss": 0.5827, + "slid_loss": 0.5288, + "step": 4964, + "time": 14.75 + }, + { + "epoch": 4.77, + "learning_rate": "1.0065e-04", + "loss": 0.548, + "slid_loss": 0.5289, + "step": 4965, + "time": 13.23 + }, + { + "epoch": 4.77, + "learning_rate": "1.0064e-04", + "loss": 0.5588, + "slid_loss": 0.5288, + "step": 4966, + "time": 11.47 + }, + { + "epoch": 4.77, + "learning_rate": "1.0064e-04", + "loss": 0.4564, + "slid_loss": 0.5275, + "step": 4967, + "time": 12.19 + }, + { + "epoch": 4.77, + "learning_rate": "1.0063e-04", + "loss": 0.4801, + "slid_loss": 0.5271, + "step": 4968, + "time": 12.62 + }, + { + "epoch": 4.77, + "learning_rate": "1.0063e-04", + "loss": 0.5736, + "slid_loss": 0.5269, + "step": 4969, + "time": 12.25 + }, + { + "epoch": 4.77, + "learning_rate": "1.0062e-04", + "loss": 0.5115, + "slid_loss": 0.5269, + "step": 4970, + "time": 10.91 + }, + { + "epoch": 4.78, + "learning_rate": "1.0061e-04", + "loss": 0.4462, + "slid_loss": 0.5261, + "step": 4971, + "time": 12.88 + }, + { + "epoch": 4.78, + "learning_rate": "1.0061e-04", + "loss": 0.5051, + "slid_loss": 0.5269, + "step": 4972, + "time": 11.2 + }, + { + "epoch": 4.78, + "learning_rate": "1.0060e-04", + "loss": 0.506, + "slid_loss": 0.5263, + "step": 4973, + "time": 13.81 + }, + { + "epoch": 4.78, + "learning_rate": "1.0060e-04", + "loss": 0.5347, + "slid_loss": 0.5265, + "step": 4974, + "time": 12.94 + }, + { + "epoch": 4.78, + "learning_rate": "1.0059e-04", + "loss": 0.5731, + "slid_loss": 0.527, + "step": 4975, + "time": 13.23 + }, + { + "epoch": 4.78, + "learning_rate": "1.0059e-04", + "loss": 0.5489, + "slid_loss": 0.5274, + "step": 4976, + "time": 13.95 + }, + { + "epoch": 4.78, + "learning_rate": "1.0058e-04", + "loss": 0.5049, + "slid_loss": 0.5271, + "step": 4977, + "time": 13.64 + }, + { + "epoch": 4.78, + "learning_rate": "1.0058e-04", + "loss": 0.4651, + "slid_loss": 0.5265, + "step": 4978, + "time": 12.82 + }, + { + "epoch": 4.78, + "learning_rate": "1.0057e-04", + "loss": 0.4791, + "slid_loss": 0.5259, + "step": 4979, + "time": 13.71 + }, + { + "epoch": 4.78, + "learning_rate": "1.0057e-04", + "loss": 0.5595, + "slid_loss": 0.5256, + "step": 4980, + "time": 11.62 + }, + { + "epoch": 4.78, + "learning_rate": "1.0056e-04", + "loss": 0.4834, + "slid_loss": 0.5249, + "step": 4981, + "time": 13.16 + }, + { + "epoch": 4.79, + "learning_rate": "1.0056e-04", + "loss": 0.5057, + "slid_loss": 0.525, + "step": 4982, + "time": 14.6 + }, + { + "epoch": 4.79, + "learning_rate": "1.0055e-04", + "loss": 0.508, + "slid_loss": 0.5249, + "step": 4983, + "time": 11.67 + }, + { + "epoch": 4.79, + "learning_rate": "1.0055e-04", + "loss": 0.5353, + "slid_loss": 0.5253, + "step": 4984, + "time": 13.41 + }, + { + "epoch": 4.79, + "learning_rate": "1.0054e-04", + "loss": 0.5552, + "slid_loss": 0.5247, + "step": 4985, + "time": 11.68 + }, + { + "epoch": 4.79, + "learning_rate": "1.0054e-04", + "loss": 0.5229, + "slid_loss": 0.5253, + "step": 4986, + "time": 13.08 + }, + { + "epoch": 4.79, + "learning_rate": "1.0053e-04", + "loss": 0.442, + "slid_loss": 0.5242, + "step": 4987, + "time": 12.49 + }, + { + "epoch": 4.79, + "learning_rate": "1.0053e-04", + "loss": 0.4588, + "slid_loss": 0.5236, + "step": 4988, + "time": 12.28 + }, + { + "epoch": 4.79, + "learning_rate": "1.0052e-04", + "loss": 0.5047, + "slid_loss": 0.5228, + "step": 4989, + "time": 13.64 + }, + { + "epoch": 4.79, + "learning_rate": "1.0052e-04", + "loss": 0.42, + "slid_loss": 0.5219, + "step": 4990, + "time": 11.25 + }, + { + "epoch": 4.79, + "learning_rate": "1.0051e-04", + "loss": 0.4105, + "slid_loss": 0.5209, + "step": 4991, + "time": 14.5 + }, + { + "epoch": 4.8, + "learning_rate": "1.0051e-04", + "loss": 0.5074, + "slid_loss": 0.5209, + "step": 4992, + "time": 11.67 + }, + { + "epoch": 4.8, + "learning_rate": "1.0050e-04", + "loss": 0.5861, + "slid_loss": 0.521, + "step": 4993, + "time": 13.23 + }, + { + "epoch": 4.8, + "learning_rate": "1.0050e-04", + "loss": 0.5762, + "slid_loss": 0.5215, + "step": 4994, + "time": 14.38 + }, + { + "epoch": 4.8, + "learning_rate": "1.0050e-04", + "loss": 0.492, + "slid_loss": 0.5214, + "step": 4995, + "time": 13.34 + }, + { + "epoch": 4.8, + "learning_rate": "1.0049e-04", + "loss": 0.5323, + "slid_loss": 0.521, + "step": 4996, + "time": 13.97 + }, + { + "epoch": 4.8, + "learning_rate": "1.0049e-04", + "loss": 0.4007, + "slid_loss": 0.5199, + "step": 4997, + "time": 13.97 + }, + { + "epoch": 4.8, + "learning_rate": "1.0048e-04", + "loss": 0.4999, + "slid_loss": 0.5191, + "step": 4998, + "time": 13.34 + }, + { + "epoch": 4.8, + "learning_rate": "1.0048e-04", + "loss": 0.5152, + "slid_loss": 0.5189, + "step": 4999, + "time": 13.3 + }, + { + "epoch": 4.8, + "learning_rate": "1.0047e-04", + "loss": 0.4752, + "slid_loss": 0.5189, + "step": 5000, + "time": 13.97 + }, + { + "epoch": 4.8, + "learning_rate": "1.0047e-04", + "loss": 0.52, + "slid_loss": 0.5186, + "step": 5001, + "time": 12.14 + }, + { + "epoch": 4.8, + "learning_rate": "1.0046e-04", + "loss": 0.4685, + "slid_loss": 0.5173, + "step": 5002, + "time": 13.36 + }, + { + "epoch": 4.81, + "learning_rate": "1.0046e-04", + "loss": 0.4753, + "slid_loss": 0.517, + "step": 5003, + "time": 13.79 + }, + { + "epoch": 4.81, + "learning_rate": "1.0045e-04", + "loss": 0.5331, + "slid_loss": 0.5169, + "step": 5004, + "time": 13.6 + }, + { + "epoch": 4.81, + "learning_rate": "1.0045e-04", + "loss": 0.538, + "slid_loss": 0.5171, + "step": 5005, + "time": 13.68 + }, + { + "epoch": 4.81, + "learning_rate": "1.0044e-04", + "loss": 0.4631, + "slid_loss": 0.5173, + "step": 5006, + "time": 11.33 + }, + { + "epoch": 4.81, + "learning_rate": "1.0044e-04", + "loss": 0.4972, + "slid_loss": 0.5168, + "step": 5007, + "time": 14.04 + }, + { + "epoch": 4.81, + "learning_rate": "1.0044e-04", + "loss": 0.5314, + "slid_loss": 0.5165, + "step": 5008, + "time": 13.69 + }, + { + "epoch": 4.81, + "learning_rate": "1.0043e-04", + "loss": 0.5263, + "slid_loss": 0.516, + "step": 5009, + "time": 13.11 + }, + { + "epoch": 4.81, + "learning_rate": "1.0043e-04", + "loss": 0.5838, + "slid_loss": 0.517, + "step": 5010, + "time": 12.96 + }, + { + "epoch": 4.81, + "learning_rate": "1.0042e-04", + "loss": 0.4748, + "slid_loss": 0.5168, + "step": 5011, + "time": 14.06 + }, + { + "epoch": 4.81, + "learning_rate": "1.0042e-04", + "loss": 0.6314, + "slid_loss": 0.5176, + "step": 5012, + "time": 14.53 + }, + { + "epoch": 4.82, + "learning_rate": "1.0041e-04", + "loss": 0.4864, + "slid_loss": 0.5169, + "step": 5013, + "time": 12.7 + }, + { + "epoch": 4.82, + "learning_rate": "1.0041e-04", + "loss": 0.5547, + "slid_loss": 0.5172, + "step": 5014, + "time": 12.79 + }, + { + "epoch": 4.82, + "learning_rate": "1.0041e-04", + "loss": 0.5734, + "slid_loss": 0.5182, + "step": 5015, + "time": 11.16 + }, + { + "epoch": 4.82, + "learning_rate": "1.0040e-04", + "loss": 0.459, + "slid_loss": 0.5174, + "step": 5016, + "time": 12.83 + }, + { + "epoch": 4.82, + "learning_rate": "1.0040e-04", + "loss": 0.5851, + "slid_loss": 0.5186, + "step": 5017, + "time": 13.18 + }, + { + "epoch": 4.82, + "learning_rate": "1.0039e-04", + "loss": 0.5792, + "slid_loss": 0.5185, + "step": 5018, + "time": 13.14 + }, + { + "epoch": 4.82, + "learning_rate": "1.0039e-04", + "loss": 0.5419, + "slid_loss": 0.5188, + "step": 5019, + "time": 12.4 + }, + { + "epoch": 4.82, + "learning_rate": "1.0038e-04", + "loss": 0.4976, + "slid_loss": 0.5176, + "step": 5020, + "time": 11.58 + }, + { + "epoch": 4.82, + "learning_rate": "1.0038e-04", + "loss": 0.4912, + "slid_loss": 0.5177, + "step": 5021, + "time": 12.3 + }, + { + "epoch": 4.82, + "learning_rate": "1.0038e-04", + "loss": 0.4912, + "slid_loss": 0.5174, + "step": 5022, + "time": 14.24 + }, + { + "epoch": 4.83, + "learning_rate": "1.0037e-04", + "loss": 0.4749, + "slid_loss": 0.5164, + "step": 5023, + "time": 13.93 + }, + { + "epoch": 4.83, + "learning_rate": "1.0037e-04", + "loss": 0.5367, + "slid_loss": 0.5165, + "step": 5024, + "time": 13.37 + }, + { + "epoch": 4.83, + "learning_rate": "1.0036e-04", + "loss": 0.4902, + "slid_loss": 0.5161, + "step": 5025, + "time": 12.87 + }, + { + "epoch": 4.83, + "learning_rate": "1.0036e-04", + "loss": 0.5039, + "slid_loss": 0.5151, + "step": 5026, + "time": 11.89 + }, + { + "epoch": 4.83, + "learning_rate": "1.0036e-04", + "loss": 0.5113, + "slid_loss": 0.5147, + "step": 5027, + "time": 10.8 + }, + { + "epoch": 4.83, + "learning_rate": "1.0035e-04", + "loss": 0.4677, + "slid_loss": 0.5148, + "step": 5028, + "time": 13.5 + }, + { + "epoch": 4.83, + "learning_rate": "1.0035e-04", + "loss": 0.5348, + "slid_loss": 0.5149, + "step": 5029, + "time": 12.88 + }, + { + "epoch": 4.83, + "learning_rate": "1.0034e-04", + "loss": 0.4847, + "slid_loss": 0.5152, + "step": 5030, + "time": 11.0 + }, + { + "epoch": 4.83, + "learning_rate": "1.0034e-04", + "loss": 0.4739, + "slid_loss": 0.5142, + "step": 5031, + "time": 12.03 + }, + { + "epoch": 4.83, + "learning_rate": "1.0034e-04", + "loss": 0.5386, + "slid_loss": 0.5145, + "step": 5032, + "time": 13.71 + }, + { + "epoch": 4.83, + "learning_rate": "1.0033e-04", + "loss": 0.5103, + "slid_loss": 0.5148, + "step": 5033, + "time": 13.73 + }, + { + "epoch": 4.84, + "learning_rate": "1.0033e-04", + "loss": 0.5369, + "slid_loss": 0.5144, + "step": 5034, + "time": 13.48 + }, + { + "epoch": 4.84, + "learning_rate": "1.0032e-04", + "loss": 0.482, + "slid_loss": 0.5144, + "step": 5035, + "time": 13.77 + }, + { + "epoch": 4.84, + "learning_rate": "1.0032e-04", + "loss": 0.5149, + "slid_loss": 0.5145, + "step": 5036, + "time": 13.42 + }, + { + "epoch": 4.84, + "learning_rate": "1.0032e-04", + "loss": 0.4198, + "slid_loss": 0.5127, + "step": 5037, + "time": 13.95 + }, + { + "epoch": 4.84, + "learning_rate": "1.0031e-04", + "loss": 0.4854, + "slid_loss": 0.5124, + "step": 5038, + "time": 14.0 + }, + { + "epoch": 4.84, + "learning_rate": "1.0031e-04", + "loss": 0.5228, + "slid_loss": 0.5116, + "step": 5039, + "time": 13.71 + }, + { + "epoch": 4.84, + "learning_rate": "1.0031e-04", + "loss": 0.4927, + "slid_loss": 0.5115, + "step": 5040, + "time": 11.13 + }, + { + "epoch": 4.84, + "learning_rate": "1.0030e-04", + "loss": 0.4716, + "slid_loss": 0.5113, + "step": 5041, + "time": 11.68 + }, + { + "epoch": 4.84, + "learning_rate": "1.0030e-04", + "loss": 0.4168, + "slid_loss": 0.5108, + "step": 5042, + "time": 13.8 + }, + { + "epoch": 4.84, + "learning_rate": "1.0029e-04", + "loss": 0.4384, + "slid_loss": 0.5093, + "step": 5043, + "time": 13.69 + }, + { + "epoch": 4.85, + "learning_rate": "1.0029e-04", + "loss": 0.4708, + "slid_loss": 0.5094, + "step": 5044, + "time": 13.0 + }, + { + "epoch": 4.85, + "learning_rate": "1.0029e-04", + "loss": 0.6018, + "slid_loss": 0.5101, + "step": 5045, + "time": 13.72 + }, + { + "epoch": 4.85, + "learning_rate": "1.0028e-04", + "loss": 0.4657, + "slid_loss": 0.5093, + "step": 5046, + "time": 13.71 + }, + { + "epoch": 4.85, + "learning_rate": "1.0028e-04", + "loss": 0.6058, + "slid_loss": 0.5104, + "step": 5047, + "time": 13.7 + }, + { + "epoch": 4.85, + "learning_rate": "1.0028e-04", + "loss": 0.5254, + "slid_loss": 0.5112, + "step": 5048, + "time": 13.37 + }, + { + "epoch": 4.85, + "learning_rate": "1.0027e-04", + "loss": 0.477, + "slid_loss": 0.5104, + "step": 5049, + "time": 12.24 + }, + { + "epoch": 4.85, + "learning_rate": "1.0027e-04", + "loss": 0.5665, + "slid_loss": 0.5107, + "step": 5050, + "time": 13.77 + }, + { + "epoch": 4.85, + "learning_rate": "1.0027e-04", + "loss": 0.4572, + "slid_loss": 0.5103, + "step": 5051, + "time": 13.0 + }, + { + "epoch": 4.85, + "learning_rate": "1.0026e-04", + "loss": 0.5233, + "slid_loss": 0.5101, + "step": 5052, + "time": 13.28 + }, + { + "epoch": 4.85, + "learning_rate": "1.0026e-04", + "loss": 0.5242, + "slid_loss": 0.5094, + "step": 5053, + "time": 13.53 + }, + { + "epoch": 4.85, + "learning_rate": "1.0026e-04", + "loss": 0.5386, + "slid_loss": 0.5098, + "step": 5054, + "time": 13.82 + }, + { + "epoch": 4.86, + "learning_rate": "1.0025e-04", + "loss": 0.5242, + "slid_loss": 0.5095, + "step": 5055, + "time": 13.34 + }, + { + "epoch": 4.86, + "learning_rate": "1.0025e-04", + "loss": 0.521, + "slid_loss": 0.5097, + "step": 5056, + "time": 12.53 + }, + { + "epoch": 4.86, + "learning_rate": "1.0025e-04", + "loss": 0.4955, + "slid_loss": 0.5092, + "step": 5057, + "time": 12.58 + }, + { + "epoch": 4.86, + "learning_rate": "1.0024e-04", + "loss": 0.5511, + "slid_loss": 0.5093, + "step": 5058, + "time": 12.93 + }, + { + "epoch": 4.86, + "learning_rate": "1.0024e-04", + "loss": 0.5867, + "slid_loss": 0.5099, + "step": 5059, + "time": 13.82 + }, + { + "epoch": 4.86, + "learning_rate": "1.0024e-04", + "loss": 0.4963, + "slid_loss": 0.5101, + "step": 5060, + "time": 11.52 + }, + { + "epoch": 4.86, + "learning_rate": "1.0023e-04", + "loss": 0.5654, + "slid_loss": 0.51, + "step": 5061, + "time": 13.55 + }, + { + "epoch": 4.86, + "learning_rate": "1.0023e-04", + "loss": 0.5072, + "slid_loss": 0.5103, + "step": 5062, + "time": 13.13 + }, + { + "epoch": 4.86, + "learning_rate": "1.0023e-04", + "loss": 0.508, + "slid_loss": 0.5109, + "step": 5063, + "time": 12.91 + }, + { + "epoch": 4.86, + "learning_rate": "1.0022e-04", + "loss": 0.4966, + "slid_loss": 0.51, + "step": 5064, + "time": 12.93 + }, + { + "epoch": 4.87, + "learning_rate": "1.0022e-04", + "loss": 0.5723, + "slid_loss": 0.5103, + "step": 5065, + "time": 13.53 + }, + { + "epoch": 4.87, + "learning_rate": "1.0022e-04", + "loss": 0.4937, + "slid_loss": 0.5096, + "step": 5066, + "time": 12.23 + }, + { + "epoch": 4.87, + "learning_rate": "1.0021e-04", + "loss": 0.5286, + "slid_loss": 0.5103, + "step": 5067, + "time": 11.82 + }, + { + "epoch": 4.87, + "learning_rate": "1.0021e-04", + "loss": 0.496, + "slid_loss": 0.5105, + "step": 5068, + "time": 12.93 + }, + { + "epoch": 4.87, + "learning_rate": "1.0021e-04", + "loss": 0.6164, + "slid_loss": 0.5109, + "step": 5069, + "time": 14.18 + }, + { + "epoch": 4.87, + "learning_rate": "1.0020e-04", + "loss": 0.4751, + "slid_loss": 0.5106, + "step": 5070, + "time": 12.47 + }, + { + "epoch": 4.87, + "learning_rate": "1.0020e-04", + "loss": 0.4714, + "slid_loss": 0.5108, + "step": 5071, + "time": 11.37 + }, + { + "epoch": 4.87, + "learning_rate": "1.0020e-04", + "loss": 0.5086, + "slid_loss": 0.5108, + "step": 5072, + "time": 13.06 + }, + { + "epoch": 4.87, + "learning_rate": "1.0020e-04", + "loss": 0.5588, + "slid_loss": 0.5114, + "step": 5073, + "time": 13.89 + }, + { + "epoch": 4.87, + "learning_rate": "1.0019e-04", + "loss": 0.4988, + "slid_loss": 0.511, + "step": 5074, + "time": 13.58 + }, + { + "epoch": 4.88, + "learning_rate": "1.0019e-04", + "loss": 0.5026, + "slid_loss": 0.5103, + "step": 5075, + "time": 11.67 + }, + { + "epoch": 4.88, + "learning_rate": "1.0019e-04", + "loss": 0.5332, + "slid_loss": 0.5102, + "step": 5076, + "time": 13.48 + }, + { + "epoch": 4.88, + "learning_rate": "1.0018e-04", + "loss": 0.5369, + "slid_loss": 0.5105, + "step": 5077, + "time": 12.31 + }, + { + "epoch": 4.88, + "learning_rate": "1.0018e-04", + "loss": 0.5436, + "slid_loss": 0.5113, + "step": 5078, + "time": 13.66 + }, + { + "epoch": 4.88, + "learning_rate": "1.0018e-04", + "loss": 0.4862, + "slid_loss": 0.5113, + "step": 5079, + "time": 13.6 + }, + { + "epoch": 4.88, + "learning_rate": "1.0018e-04", + "loss": 0.4378, + "slid_loss": 0.5101, + "step": 5080, + "time": 11.28 + }, + { + "epoch": 4.88, + "learning_rate": "1.0017e-04", + "loss": 0.5362, + "slid_loss": 0.5106, + "step": 5081, + "time": 13.74 + }, + { + "epoch": 4.88, + "learning_rate": "1.0017e-04", + "loss": 0.5019, + "slid_loss": 0.5106, + "step": 5082, + "time": 11.54 + }, + { + "epoch": 4.88, + "learning_rate": "1.0017e-04", + "loss": 0.5193, + "slid_loss": 0.5107, + "step": 5083, + "time": 14.52 + }, + { + "epoch": 4.88, + "learning_rate": "1.0016e-04", + "loss": 0.5521, + "slid_loss": 0.5109, + "step": 5084, + "time": 11.84 + }, + { + "epoch": 4.88, + "learning_rate": "1.0016e-04", + "loss": 0.5416, + "slid_loss": 0.5107, + "step": 5085, + "time": 13.52 + }, + { + "epoch": 4.89, + "learning_rate": "1.0016e-04", + "loss": 0.4548, + "slid_loss": 0.5101, + "step": 5086, + "time": 13.86 + }, + { + "epoch": 4.89, + "learning_rate": "1.0016e-04", + "loss": 0.5053, + "slid_loss": 0.5107, + "step": 5087, + "time": 11.37 + }, + { + "epoch": 4.89, + "learning_rate": "1.0015e-04", + "loss": 0.5044, + "slid_loss": 0.5112, + "step": 5088, + "time": 13.49 + }, + { + "epoch": 4.89, + "learning_rate": "1.0015e-04", + "loss": 0.5102, + "slid_loss": 0.5112, + "step": 5089, + "time": 13.39 + }, + { + "epoch": 4.89, + "learning_rate": "1.0015e-04", + "loss": 0.484, + "slid_loss": 0.5118, + "step": 5090, + "time": 12.74 + }, + { + "epoch": 4.89, + "learning_rate": "1.0015e-04", + "loss": 0.4358, + "slid_loss": 0.5121, + "step": 5091, + "time": 11.93 + }, + { + "epoch": 4.89, + "learning_rate": "1.0014e-04", + "loss": 0.4838, + "slid_loss": 0.5119, + "step": 5092, + "time": 13.43 + }, + { + "epoch": 4.89, + "learning_rate": "1.0014e-04", + "loss": 0.5536, + "slid_loss": 0.5115, + "step": 5093, + "time": 13.14 + }, + { + "epoch": 4.89, + "learning_rate": "1.0014e-04", + "loss": 0.5303, + "slid_loss": 0.5111, + "step": 5094, + "time": 13.42 + }, + { + "epoch": 4.89, + "learning_rate": "1.0014e-04", + "loss": 0.4498, + "slid_loss": 0.5107, + "step": 5095, + "time": 13.38 + }, + { + "epoch": 4.9, + "learning_rate": "1.0013e-04", + "loss": 0.5614, + "slid_loss": 0.5109, + "step": 5096, + "time": 11.33 + }, + { + "epoch": 4.9, + "learning_rate": "1.0013e-04", + "loss": 0.5445, + "slid_loss": 0.5124, + "step": 5097, + "time": 13.93 + }, + { + "epoch": 4.9, + "learning_rate": "1.0013e-04", + "loss": 0.4069, + "slid_loss": 0.5115, + "step": 5098, + "time": 14.06 + }, + { + "epoch": 4.9, + "learning_rate": "1.0013e-04", + "loss": 0.5411, + "slid_loss": 0.5117, + "step": 5099, + "time": 11.34 + }, + { + "epoch": 4.9, + "learning_rate": "1.0012e-04", + "loss": 0.4739, + "slid_loss": 0.5117, + "step": 5100, + "time": 11.55 + }, + { + "epoch": 4.9, + "learning_rate": "1.0012e-04", + "loss": 0.5741, + "slid_loss": 0.5122, + "step": 5101, + "time": 11.66 + }, + { + "epoch": 4.9, + "learning_rate": "1.0012e-04", + "loss": 0.502, + "slid_loss": 0.5126, + "step": 5102, + "time": 11.93 + }, + { + "epoch": 4.9, + "learning_rate": "1.0012e-04", + "loss": 0.5658, + "slid_loss": 0.5135, + "step": 5103, + "time": 13.21 + }, + { + "epoch": 4.9, + "learning_rate": "1.0011e-04", + "loss": 0.5497, + "slid_loss": 0.5136, + "step": 5104, + "time": 11.55 + }, + { + "epoch": 4.9, + "learning_rate": "1.0011e-04", + "loss": 0.4897, + "slid_loss": 0.5132, + "step": 5105, + "time": 11.56 + }, + { + "epoch": 4.9, + "learning_rate": "1.0011e-04", + "loss": 0.5316, + "slid_loss": 0.5138, + "step": 5106, + "time": 14.29 + }, + { + "epoch": 4.91, + "learning_rate": "1.0011e-04", + "loss": 0.5878, + "slid_loss": 0.5148, + "step": 5107, + "time": 11.22 + }, + { + "epoch": 4.91, + "learning_rate": "1.0011e-04", + "loss": 0.5206, + "slid_loss": 0.5146, + "step": 5108, + "time": 13.74 + }, + { + "epoch": 4.91, + "learning_rate": "1.0010e-04", + "loss": 0.4889, + "slid_loss": 0.5143, + "step": 5109, + "time": 13.45 + }, + { + "epoch": 4.91, + "learning_rate": "1.0010e-04", + "loss": 0.5671, + "slid_loss": 0.5141, + "step": 5110, + "time": 13.94 + }, + { + "epoch": 4.91, + "learning_rate": "1.0010e-04", + "loss": 0.5685, + "slid_loss": 0.515, + "step": 5111, + "time": 13.84 + }, + { + "epoch": 4.91, + "learning_rate": "1.0010e-04", + "loss": 0.5259, + "slid_loss": 0.514, + "step": 5112, + "time": 13.7 + }, + { + "epoch": 4.91, + "learning_rate": "1.0010e-04", + "loss": 0.5225, + "slid_loss": 0.5143, + "step": 5113, + "time": 13.7 + }, + { + "epoch": 4.91, + "learning_rate": "1.0009e-04", + "loss": 0.4598, + "slid_loss": 0.5134, + "step": 5114, + "time": 11.98 + }, + { + "epoch": 4.91, + "learning_rate": "1.0009e-04", + "loss": 0.5688, + "slid_loss": 0.5134, + "step": 5115, + "time": 12.86 + }, + { + "epoch": 4.91, + "learning_rate": "1.0009e-04", + "loss": 0.499, + "slid_loss": 0.5138, + "step": 5116, + "time": 10.7 + }, + { + "epoch": 4.92, + "learning_rate": "1.0009e-04", + "loss": 0.511, + "slid_loss": 0.513, + "step": 5117, + "time": 13.47 + }, + { + "epoch": 4.92, + "learning_rate": "1.0009e-04", + "loss": 0.4455, + "slid_loss": 0.5117, + "step": 5118, + "time": 13.65 + }, + { + "epoch": 4.92, + "learning_rate": "1.0008e-04", + "loss": 0.4618, + "slid_loss": 0.5109, + "step": 5119, + "time": 13.33 + }, + { + "epoch": 4.92, + "learning_rate": "1.0008e-04", + "loss": 0.5372, + "slid_loss": 0.5113, + "step": 5120, + "time": 11.99 + }, + { + "epoch": 4.92, + "learning_rate": "1.0008e-04", + "loss": 0.5449, + "slid_loss": 0.5118, + "step": 5121, + "time": 13.57 + }, + { + "epoch": 4.92, + "learning_rate": "1.0008e-04", + "loss": 0.6312, + "slid_loss": 0.5132, + "step": 5122, + "time": 11.61 + }, + { + "epoch": 4.92, + "learning_rate": "1.0008e-04", + "loss": 0.5028, + "slid_loss": 0.5135, + "step": 5123, + "time": 13.14 + }, + { + "epoch": 4.92, + "learning_rate": "1.0007e-04", + "loss": 0.5731, + "slid_loss": 0.5138, + "step": 5124, + "time": 13.92 + }, + { + "epoch": 4.92, + "learning_rate": "1.0007e-04", + "loss": 0.5178, + "slid_loss": 0.5141, + "step": 5125, + "time": 12.87 + }, + { + "epoch": 4.92, + "learning_rate": "1.0007e-04", + "loss": 0.4737, + "slid_loss": 0.5138, + "step": 5126, + "time": 13.75 + }, + { + "epoch": 4.93, + "learning_rate": "1.0007e-04", + "loss": 0.5634, + "slid_loss": 0.5143, + "step": 5127, + "time": 12.68 + }, + { + "epoch": 4.93, + "learning_rate": "1.0007e-04", + "loss": 0.5468, + "slid_loss": 0.5151, + "step": 5128, + "time": 12.88 + }, + { + "epoch": 4.93, + "learning_rate": "1.0006e-04", + "loss": 0.5456, + "slid_loss": 0.5152, + "step": 5129, + "time": 13.32 + }, + { + "epoch": 4.93, + "learning_rate": "1.0006e-04", + "loss": 0.4488, + "slid_loss": 0.5149, + "step": 5130, + "time": 11.68 + }, + { + "epoch": 4.93, + "learning_rate": "1.0006e-04", + "loss": 0.5207, + "slid_loss": 0.5153, + "step": 5131, + "time": 12.61 + }, + { + "epoch": 4.93, + "learning_rate": "1.0006e-04", + "loss": 0.4721, + "slid_loss": 0.5147, + "step": 5132, + "time": 13.26 + }, + { + "epoch": 4.93, + "learning_rate": "1.0006e-04", + "loss": 0.5193, + "slid_loss": 0.5148, + "step": 5133, + "time": 11.12 + }, + { + "epoch": 4.93, + "learning_rate": "1.0006e-04", + "loss": 0.4524, + "slid_loss": 0.5139, + "step": 5134, + "time": 13.6 + }, + { + "epoch": 4.93, + "learning_rate": "1.0006e-04", + "loss": 0.5952, + "slid_loss": 0.5151, + "step": 5135, + "time": 13.93 + }, + { + "epoch": 4.93, + "learning_rate": "1.0005e-04", + "loss": 0.5182, + "slid_loss": 0.5151, + "step": 5136, + "time": 13.63 + }, + { + "epoch": 4.93, + "learning_rate": "1.0005e-04", + "loss": 0.4437, + "slid_loss": 0.5153, + "step": 5137, + "time": 12.24 + }, + { + "epoch": 4.94, + "learning_rate": "1.0005e-04", + "loss": 0.5045, + "slid_loss": 0.5155, + "step": 5138, + "time": 13.32 + }, + { + "epoch": 4.94, + "learning_rate": "1.0005e-04", + "loss": 0.5059, + "slid_loss": 0.5154, + "step": 5139, + "time": 12.9 + }, + { + "epoch": 4.94, + "learning_rate": "1.0005e-04", + "loss": 0.4416, + "slid_loss": 0.5148, + "step": 5140, + "time": 14.01 + }, + { + "epoch": 4.94, + "learning_rate": "1.0005e-04", + "loss": 0.5812, + "slid_loss": 0.5159, + "step": 5141, + "time": 13.14 + }, + { + "epoch": 4.94, + "learning_rate": "1.0004e-04", + "loss": 0.52, + "slid_loss": 0.517, + "step": 5142, + "time": 12.38 + }, + { + "epoch": 4.94, + "learning_rate": "1.0004e-04", + "loss": 0.4483, + "slid_loss": 0.5171, + "step": 5143, + "time": 13.25 + }, + { + "epoch": 4.94, + "learning_rate": "1.0004e-04", + "loss": 0.4625, + "slid_loss": 0.517, + "step": 5144, + "time": 13.64 + }, + { + "epoch": 4.94, + "learning_rate": "1.0004e-04", + "loss": 0.4795, + "slid_loss": 0.5158, + "step": 5145, + "time": 13.73 + }, + { + "epoch": 4.94, + "learning_rate": "1.0004e-04", + "loss": 0.5096, + "slid_loss": 0.5162, + "step": 5146, + "time": 11.42 + }, + { + "epoch": 4.94, + "learning_rate": "1.0004e-04", + "loss": 0.4762, + "slid_loss": 0.5149, + "step": 5147, + "time": 12.74 + }, + { + "epoch": 4.95, + "learning_rate": "1.0004e-04", + "loss": 0.4907, + "slid_loss": 0.5146, + "step": 5148, + "time": 13.29 + }, + { + "epoch": 4.95, + "learning_rate": "1.0004e-04", + "loss": 0.494, + "slid_loss": 0.5147, + "step": 5149, + "time": 14.12 + }, + { + "epoch": 4.95, + "learning_rate": "1.0003e-04", + "loss": 0.4353, + "slid_loss": 0.5134, + "step": 5150, + "time": 12.91 + }, + { + "epoch": 4.95, + "learning_rate": "1.0003e-04", + "loss": 0.534, + "slid_loss": 0.5142, + "step": 5151, + "time": 13.44 + }, + { + "epoch": 4.95, + "learning_rate": "1.0003e-04", + "loss": 0.4997, + "slid_loss": 0.514, + "step": 5152, + "time": 12.17 + }, + { + "epoch": 4.95, + "learning_rate": "1.0003e-04", + "loss": 0.4979, + "slid_loss": 0.5137, + "step": 5153, + "time": 13.66 + }, + { + "epoch": 4.95, + "learning_rate": "1.0003e-04", + "loss": 0.5482, + "slid_loss": 0.5138, + "step": 5154, + "time": 13.75 + }, + { + "epoch": 4.95, + "learning_rate": "1.0003e-04", + "loss": 0.5279, + "slid_loss": 0.5138, + "step": 5155, + "time": 12.15 + }, + { + "epoch": 4.95, + "learning_rate": "1.0003e-04", + "loss": 0.4653, + "slid_loss": 0.5133, + "step": 5156, + "time": 12.8 + }, + { + "epoch": 4.95, + "learning_rate": "1.0003e-04", + "loss": 0.4983, + "slid_loss": 0.5133, + "step": 5157, + "time": 12.96 + }, + { + "epoch": 4.95, + "learning_rate": "1.0002e-04", + "loss": 0.3868, + "slid_loss": 0.5116, + "step": 5158, + "time": 12.0 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.5163, + "slid_loss": 0.5109, + "step": 5159, + "time": 13.55 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.4181, + "slid_loss": 0.5102, + "step": 5160, + "time": 11.98 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.3912, + "slid_loss": 0.5084, + "step": 5161, + "time": 12.9 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.4785, + "slid_loss": 0.5081, + "step": 5162, + "time": 13.73 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.5481, + "slid_loss": 0.5085, + "step": 5163, + "time": 13.86 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.4677, + "slid_loss": 0.5082, + "step": 5164, + "time": 13.44 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.529, + "slid_loss": 0.5078, + "step": 5165, + "time": 13.12 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.536, + "slid_loss": 0.5082, + "step": 5166, + "time": 13.66 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.5129, + "slid_loss": 0.5081, + "step": 5167, + "time": 11.2 + }, + { + "epoch": 4.96, + "learning_rate": "1.0002e-04", + "loss": 0.531, + "slid_loss": 0.5084, + "step": 5168, + "time": 12.3 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.4764, + "slid_loss": 0.507, + "step": 5169, + "time": 13.5 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.5513, + "slid_loss": 0.5078, + "step": 5170, + "time": 11.95 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.4493, + "slid_loss": 0.5076, + "step": 5171, + "time": 13.49 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.5211, + "slid_loss": 0.5077, + "step": 5172, + "time": 13.61 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.4601, + "slid_loss": 0.5067, + "step": 5173, + "time": 13.38 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.5012, + "slid_loss": 0.5067, + "step": 5174, + "time": 14.01 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.5893, + "slid_loss": 0.5076, + "step": 5175, + "time": 12.05 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.5335, + "slid_loss": 0.5076, + "step": 5176, + "time": 13.6 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.5616, + "slid_loss": 0.5079, + "step": 5177, + "time": 11.15 + }, + { + "epoch": 4.97, + "learning_rate": "1.0001e-04", + "loss": 0.4496, + "slid_loss": 0.5069, + "step": 5178, + "time": 11.32 + }, + { + "epoch": 4.98, + "learning_rate": "1.0001e-04", + "loss": 0.529, + "slid_loss": 0.5073, + "step": 5179, + "time": 12.34 + }, + { + "epoch": 4.98, + "learning_rate": "1.0001e-04", + "loss": 0.4713, + "slid_loss": 0.5077, + "step": 5180, + "time": 12.58 + }, + { + "epoch": 4.98, + "learning_rate": "1.0001e-04", + "loss": 0.4871, + "slid_loss": 0.5072, + "step": 5181, + "time": 13.46 + }, + { + "epoch": 4.98, + "learning_rate": "1.0001e-04", + "loss": 0.502, + "slid_loss": 0.5072, + "step": 5182, + "time": 11.73 + }, + { + "epoch": 4.98, + "learning_rate": "1.0001e-04", + "loss": 0.4633, + "slid_loss": 0.5066, + "step": 5183, + "time": 11.44 + }, + { + "epoch": 4.98, + "learning_rate": "1.0000e-04", + "loss": 0.4947, + "slid_loss": 0.5061, + "step": 5184, + "time": 13.24 + }, + { + "epoch": 4.98, + "learning_rate": "1.0000e-04", + "loss": 0.4861, + "slid_loss": 0.5055, + "step": 5185, + "time": 14.09 + }, + { + "epoch": 4.98, + "learning_rate": "1.0000e-04", + "loss": 0.4987, + "slid_loss": 0.5059, + "step": 5186, + "time": 14.07 + }, + { + "epoch": 4.98, + "learning_rate": "1.0000e-04", + "loss": 0.5013, + "slid_loss": 0.5059, + "step": 5187, + "time": 13.59 + }, + { + "epoch": 4.98, + "learning_rate": "1.0000e-04", + "loss": 0.5147, + "slid_loss": 0.506, + "step": 5188, + "time": 14.3 + }, + { + "epoch": 4.98, + "learning_rate": "1.0000e-04", + "loss": 0.5712, + "slid_loss": 0.5066, + "step": 5189, + "time": 12.85 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.4385, + "slid_loss": 0.5062, + "step": 5190, + "time": 13.0 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.4915, + "slid_loss": 0.5067, + "step": 5191, + "time": 11.89 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.4228, + "slid_loss": 0.5061, + "step": 5192, + "time": 11.42 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.5071, + "slid_loss": 0.5056, + "step": 5193, + "time": 11.68 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.5088, + "slid_loss": 0.5054, + "step": 5194, + "time": 13.17 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.4227, + "slid_loss": 0.5052, + "step": 5195, + "time": 13.42 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.5399, + "slid_loss": 0.5049, + "step": 5196, + "time": 14.41 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.4837, + "slid_loss": 0.5043, + "step": 5197, + "time": 14.53 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.4845, + "slid_loss": 0.5051, + "step": 5198, + "time": 13.23 + }, + { + "epoch": 4.99, + "learning_rate": "1.0000e-04", + "loss": 0.4553, + "slid_loss": 0.5042, + "step": 5199, + "time": 13.7 + }, + { + "epoch": 5.0, + "learning_rate": "1.0000e-04", + "loss": 0.5611, + "slid_loss": 0.5051, + "step": 5200, + "time": 13.83 + }, + { + "epoch": 5.0, + "learning_rate": "1.0000e-04", + "loss": 0.5229, + "slid_loss": 0.5046, + "step": 5201, + "time": 11.92 + }, + { + "epoch": 5.0, + "learning_rate": "1.0000e-04", + "loss": 0.4734, + "slid_loss": 0.5043, + "step": 5202, + "time": 13.74 + }, + { + "epoch": 5.0, + "learning_rate": "1.0000e-04", + "loss": 0.5101, + "slid_loss": 0.5038, + "step": 5203, + "time": 12.1 + }, + { + "epoch": 5.0, + "learning_rate": "1.0000e-04", + "loss": 0.5, + "slid_loss": 0.5033, + "step": 5204, + "time": 13.38 + }, + { + "epoch": 5.0, + "learning_rate": "1.0000e-04", + "loss": 0.5093, + "slid_loss": 0.5035, + "step": 5205, + "time": 12.95 + } + ], + "logging_steps": 1.0, + "max_steps": 5205, + "num_train_epochs": 5, + "save_steps": 50000.0, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef7fc47d262b3915853100f2393e7e8b78eee115 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16477a3093bec705d5c953a42339f69d1b8ddd7d862360831e7649065bce7c5d +size 6075 diff --git a/zero_to_fp32.py b/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..c98caae31534368be22b67fc4ae906836c992a8d --- /dev/null +++ b/zero_to_fp32.py @@ -0,0 +1,587 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: python zero_to_fp32.py . pytorch_model.bin + +import argparse +import torch +import glob +import math +import os +import re +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + + total_files = len(files) + state_dicts = [] + for f in files: + state_dict = torch.load(f, map_location=device) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + if zero_stage <= 2: + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + elif zero_stage == 3: + # if there is more than one param group, there will be multiple flattened tensors - one + # flattened tensor per group - for simplicity merge them into a single tensor + # + # XXX: could make the script more memory efficient for when there are multiple groups - it + # will require matching the sub-lists of param_shapes for each param group flattened tensor + + fp32_flat_groups = [ + torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts)) + ] + + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = fp32_flat_groups[0].numel() * world_size + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + for name, shape in param_shapes.items(): + + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # XXX: memory usage doubles here + state_dict[name] = torch.cat( + tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)), + 0).narrow(0, 0, unpartitioned_numel).view(shape) + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + + Returns: + - pytorch ``state_dict`` + + Note: this approach may not work if your application doesn't have sufficient free CPU memory and + you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + """ + + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + print(f"Saving fp32 state dict to {output_file}") + torch.save(state_dict, output_file) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument( + "output_file", + type=str, + help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, args.output_file, tag=args.tag)