diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2e9b14f43b35766402c6c1edc8c01859ee3b0050
--- /dev/null
+++ b/README.md
@@ -0,0 +1,202 @@
+---
+base_model: Willow123/LVP_R560_IHD24_S3_1024_N24_CAT
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.8.2
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e91f195aa27438a0fc7278f298e0d8dbcaef2eda
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,32 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "Willow123/LVP_R560_IHD24_S3_1024_N24_CAT",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 128,
+ "lora_dropout": 0.05,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": [
+ "video_mem_proj"
+ ],
+ "peft_type": "LORA",
+ "r": 128,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "feed_forward.w2",
+ "attention.wo",
+ "feed_forward.w3",
+ "feed_forward.w1",
+ "attention.wqkv"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_rslora": false
+}
diff --git a/adapter_model.bin b/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..64d3599083dc5464b098d20b4daa26cf3ee66767
--- /dev/null
+++ b/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc9d0e035f664fbf923ef2c5f1b792d06e9d354aaac940764d6344f206275985
+size 650245533
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..39090265148ac44e5d5ce46e69594ac3ad34b1e5
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,8 @@
+{
+ "<|action_end|>": 92547,
+ "<|action_start|>": 92546,
+ "<|im_end|>": 92545,
+ "<|im_start|>": 92544,
+ "<|interpreter|>": 92548,
+ "<|plugin|>": 92549
+}
diff --git a/latest b/latest
new file mode 100644
index 0000000000000000000000000000000000000000..75ae820a9e47ad9c01975fb0f9a1fead180a2721
--- /dev/null
+++ b/latest
@@ -0,0 +1 @@
+global_step5205
\ No newline at end of file
diff --git a/rng_state_0.pth b/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e378fd2d0ab32bd4f07d29f243a6e509b0792d77
--- /dev/null
+++ b/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f32f3deebf95eae840de33c723b45d006a96b29bc35e9b934212bd156d2b588
+size 21687
diff --git a/rng_state_1.pth b/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..78d8e34720df7d704d8048d2ccc6ad123fa8cb71
--- /dev/null
+++ b/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:096d3a423466f936fac9485eb4afd048be9c6e9dd001e7aefee72ad97960be60
+size 21687
diff --git a/rng_state_10.pth b/rng_state_10.pth
new file mode 100644
index 0000000000000000000000000000000000000000..54b446f1c17e13df672e935ffd744973d8f59ad7
--- /dev/null
+++ b/rng_state_10.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8f578cef3c3be5fc4a46d84373ef3cbd8841afe911cfa12a6c88b1c2ddbd1c9
+size 21698
diff --git a/rng_state_11.pth b/rng_state_11.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a6912eb48d8c1276d01544bf0db24814a5873553
--- /dev/null
+++ b/rng_state_11.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aefb94f2f768a405d771252d008f196da11b3511a95a99677a7f2c37d6fbc062
+size 21698
diff --git a/rng_state_12.pth b/rng_state_12.pth
new file mode 100644
index 0000000000000000000000000000000000000000..15ebdedde014c9dbc036b7bec529f8fc77b53cab
--- /dev/null
+++ b/rng_state_12.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84b855348be7d42d8c194c7e94c51c6c35845ae1b9cbb471a4cf4f4a0bf95f44
+size 21698
diff --git a/rng_state_13.pth b/rng_state_13.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4d4514f965e81d5c543f0ca72394203fcb1c194e
--- /dev/null
+++ b/rng_state_13.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b45bec30d67f590021d868dcac4d053d411ba351b02feeb868d60633ba8b0a3a
+size 21698
diff --git a/rng_state_14.pth b/rng_state_14.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2ad79a4462699c247b66d78c756b6da62b686544
--- /dev/null
+++ b/rng_state_14.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffda5563f489cd115a603bb92d58313efdbed991d5d4ae30937daf2637311929
+size 21698
diff --git a/rng_state_15.pth b/rng_state_15.pth
new file mode 100644
index 0000000000000000000000000000000000000000..40767c8f8fee7320f98ca422f60a84ec8b13fd65
--- /dev/null
+++ b/rng_state_15.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4a3b3e76d3dfcf2e0a4b85364ac34e592f64341f51f2b5732f98d8258503683
+size 21698
diff --git a/rng_state_16.pth b/rng_state_16.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5dcada17d6b8169a8cc29ab588893d65e96f9333
--- /dev/null
+++ b/rng_state_16.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b78460734829b0a4e9c14f9140bf6955a6b983f4d07a4ab1a185ac70eb4ee5d7
+size 21698
diff --git a/rng_state_17.pth b/rng_state_17.pth
new file mode 100644
index 0000000000000000000000000000000000000000..aed3c79d15626235fb172ccaa030ee1da54b68ac
--- /dev/null
+++ b/rng_state_17.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d71b49deebaf8677b488c61536aafbed5ab4da647cfd264c098742a0679c211f
+size 21698
diff --git a/rng_state_18.pth b/rng_state_18.pth
new file mode 100644
index 0000000000000000000000000000000000000000..45af1a8b7ecbe12121b39f1b303d3b73d47fbdb1
--- /dev/null
+++ b/rng_state_18.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90b4a2bbf369561611d9c79fca2cdae1df654bca465a3ed8778c4718fe936be0
+size 21698
diff --git a/rng_state_19.pth b/rng_state_19.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2228579cc792afacae97e226dcd37d2d0b4db8fd
--- /dev/null
+++ b/rng_state_19.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f261d09d3cbd26d0c734dd38a47b1f0834d18743a1472800f9d3e7155a494698
+size 21698
diff --git a/rng_state_2.pth b/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f08ec73f4d4384f93e4879b68776ad49368741fe
--- /dev/null
+++ b/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0809f61282e55bc316e0f0b6de6b9610219668913abd22600a57b663b2a9fb1
+size 21687
diff --git a/rng_state_20.pth b/rng_state_20.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f3eed69fc5eb6dd6693478566ea89c79a693265f
--- /dev/null
+++ b/rng_state_20.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee0d3a61cc607fbd8a5d3aeee2abd2233e8638c6fb0c19926946341f834e5b57
+size 21698
diff --git a/rng_state_21.pth b/rng_state_21.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e0509cee1a131a065aac0a6e3cca954c777d3858
--- /dev/null
+++ b/rng_state_21.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fc106ee653e4f285b14d0c908b971af3bf6cf4d2e4a9c38fc046f0a9a1dbf60
+size 21698
diff --git a/rng_state_22.pth b/rng_state_22.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c78533d360af010fa0abf0c10281b22c52b48251
--- /dev/null
+++ b/rng_state_22.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75f7f1a576417e93bd438710fbe49b9b6c29acd1dd83f68fa69041e406e33295
+size 21698
diff --git a/rng_state_23.pth b/rng_state_23.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a95b1522cfee5faf513d8c52690ccd90f40761d8
--- /dev/null
+++ b/rng_state_23.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5bede2c9a0dedc8dbae33c7408f5902bf6ca26af91f3c37badc01393765c20b
+size 21698
diff --git a/rng_state_24.pth b/rng_state_24.pth
new file mode 100644
index 0000000000000000000000000000000000000000..212e001ac5d3272da2e8252de5d7adeee0d60ddc
--- /dev/null
+++ b/rng_state_24.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ed7167b9b5af50cc9151f7e68e780fc4b61f2f73eeb3c0bb8b675c48250f42e
+size 21698
diff --git a/rng_state_25.pth b/rng_state_25.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c606cafefcf457951487182e7db310541b009068
--- /dev/null
+++ b/rng_state_25.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a8bcefe50fa56e84e00332f42333de7ac29ca9c85053b70152541f132280629
+size 21698
diff --git a/rng_state_26.pth b/rng_state_26.pth
new file mode 100644
index 0000000000000000000000000000000000000000..47fcfafac5054dd9e7297429441dfc42cf536b16
--- /dev/null
+++ b/rng_state_26.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9878267b7d028fec9517d4933e946095a87a0c2926fc1e71036ff4971f87c30
+size 21698
diff --git a/rng_state_27.pth b/rng_state_27.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e568bfd4da0fa0be99df97d5f4d41e06d5da0f4f
--- /dev/null
+++ b/rng_state_27.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fab74b9342d718bc575d40e99258a2b3bb93e47546d1e7d20584a0c5a5c01e98
+size 21698
diff --git a/rng_state_28.pth b/rng_state_28.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b11e8f7553642203dfe197d90cbb5f81292ad92c
--- /dev/null
+++ b/rng_state_28.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f9265f6bca1c7985ce8f4b6fed5e61d285044ee960d5e0fa14c794ba6753a21
+size 21698
diff --git a/rng_state_29.pth b/rng_state_29.pth
new file mode 100644
index 0000000000000000000000000000000000000000..32538dcc9982aca847a4de773af9015aa172064f
--- /dev/null
+++ b/rng_state_29.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f046cd5d4e2573eeb1ab2d78a539f5de5db3b665c459b19adba8a4db314dc992
+size 21698
diff --git a/rng_state_3.pth b/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e4d71ac68f4655708cb43a6324269f5c4a10e57f
--- /dev/null
+++ b/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a268e995c7a8abe9c05675b6dad6ef3f7b675c7490d238c343d182fec337c3a
+size 21687
diff --git a/rng_state_30.pth b/rng_state_30.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f39510c4bfedd3159cde571b855e02f76ada5976
--- /dev/null
+++ b/rng_state_30.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0fa9ee6eede2f7c682f2de10cdaf83be9593eb8d101c8246d78db6d0a73c718
+size 21698
diff --git a/rng_state_31.pth b/rng_state_31.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2d968f594cf6b6201c96645a7406550fb1c7eaff
--- /dev/null
+++ b/rng_state_31.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:903c3e981821ca9263d60633e520d37fcdcf2ab3a4d32a45355bd4ea5989849b
+size 21698
diff --git a/rng_state_32.pth b/rng_state_32.pth
new file mode 100644
index 0000000000000000000000000000000000000000..89bf16999aa9a49d7a33af00092611350dec118e
--- /dev/null
+++ b/rng_state_32.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d211c885b66eb9bd4605948e4bdf25e8a81567f91142e33728437ce83e1b566e
+size 21698
diff --git a/rng_state_33.pth b/rng_state_33.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f56fe888166ead8c83238354e3b7f5d9039e462e
--- /dev/null
+++ b/rng_state_33.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23eeb55dcacdeb4724b3ad17396c314c0744f7de9053fe96f59e57f295b4fe8e
+size 21698
diff --git a/rng_state_34.pth b/rng_state_34.pth
new file mode 100644
index 0000000000000000000000000000000000000000..be92bf71a41659b4d0cb9f05e18620de4445ddd2
--- /dev/null
+++ b/rng_state_34.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49dd174caa80771bb0ff737eb515840180862605a2d0bc367e1cf38cbad71d59
+size 21698
diff --git a/rng_state_35.pth b/rng_state_35.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b482b0ecfd5eb25992e12d381f54ca18d7b10e12
--- /dev/null
+++ b/rng_state_35.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2e9715a7cb5a3f3cb52b3df1568314848fdd8cbcad741f2152aa183a6e2e9dc
+size 21698
diff --git a/rng_state_36.pth b/rng_state_36.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7b7e423bded1c2a23aec9b2a1b7aed60cf771e3e
--- /dev/null
+++ b/rng_state_36.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a5ed4ee19cb2614099a5ba801c5ed72b14575cf1f2e25f9ffbe43a1386f7858
+size 21698
diff --git a/rng_state_37.pth b/rng_state_37.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4ac03137fcfeecf4e81c7de31eaba59f4abce685
--- /dev/null
+++ b/rng_state_37.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:651b4aac19c2d626983bba18237f747f728000646133e6b0b049f243cb3132ea
+size 21698
diff --git a/rng_state_38.pth b/rng_state_38.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b35869b1d78d4b61b03afa3e0df3c46123e9ed88
--- /dev/null
+++ b/rng_state_38.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:741c12b7082a5202243578c85ca643308320066210cd9236c8095b61fc80f37e
+size 21698
diff --git a/rng_state_39.pth b/rng_state_39.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a3d1832a876a9ca249266f7cdadb5a8353c186b2
--- /dev/null
+++ b/rng_state_39.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:973a9bd5d699c4e0b70fdf3fe694e61bf5485e0485469b1b9689f648f7e55ec5
+size 21698
diff --git a/rng_state_4.pth b/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fd9f469b6044224d2531d71eabed48806aa765d2
--- /dev/null
+++ b/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f70ea0519f22c148c6af06722c576a6b2a4d63ad7adb9ca5367629a4706d6e5b
+size 21687
diff --git a/rng_state_40.pth b/rng_state_40.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4984d1054fd90f205eeb7a19575a0e6d49e5bb3b
--- /dev/null
+++ b/rng_state_40.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:793a48ef299ca8b93a74f269a7bdc1d03857061baa7f3a63076ee937963b314d
+size 21698
diff --git a/rng_state_41.pth b/rng_state_41.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6c9e5b6dcfa080f43700b83323ce3da155d700b1
--- /dev/null
+++ b/rng_state_41.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cac3e5a480e7456e6b15be70c10c1e72762052ac4a4230fc6b4879dea35044e
+size 21698
diff --git a/rng_state_42.pth b/rng_state_42.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1ec5cc50ac3d7f02c551e9e64d789830bd311e89
--- /dev/null
+++ b/rng_state_42.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55f6179d1d35d75865cfae7e2f728992dd5e1bf7c8744f33532bf51885d4fb5e
+size 21698
diff --git a/rng_state_43.pth b/rng_state_43.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a76d7925a715279b8059a983466152cf678deac6
--- /dev/null
+++ b/rng_state_43.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f055581ee89ef7a90f56f33f47239ea59aa63446af01f45b292a22b314b4fc55
+size 21698
diff --git a/rng_state_44.pth b/rng_state_44.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d2781715dd6e2a56fbaa1e8c23d318ab39b9284f
--- /dev/null
+++ b/rng_state_44.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:145ba85aeb47b401a0a9350c847991e69fcd1a5d126e590e4b35ce539e89296e
+size 21698
diff --git a/rng_state_45.pth b/rng_state_45.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a1fbeb8ed1898563e57f7edfa610242f684cf26b
--- /dev/null
+++ b/rng_state_45.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:998194852a2d6f4c16337accc920327a300253b56dccead0ca0317d3fef7ff62
+size 21698
diff --git a/rng_state_46.pth b/rng_state_46.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a0b01c7562c937bdb56b4379bac1ca37bcada33e
--- /dev/null
+++ b/rng_state_46.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a26ffd0852413a7ab601ec75ae7e95cbb44b28bdde1dfcdce660908a29977f2
+size 21698
diff --git a/rng_state_47.pth b/rng_state_47.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e4d896af65a7687eb1a06b7887b82addfd1f6005
--- /dev/null
+++ b/rng_state_47.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fd4289f273fb9119a44d8b71406282927c1c91f514c930be1c0dd700ccc3244
+size 21698
diff --git a/rng_state_48.pth b/rng_state_48.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a0675330b9ee4942c1d510240817300b17af3445
--- /dev/null
+++ b/rng_state_48.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:508969502676847268b1d897ca0143061ddbfb191ab89db355d15f1abf9a1648
+size 21698
diff --git a/rng_state_49.pth b/rng_state_49.pth
new file mode 100644
index 0000000000000000000000000000000000000000..641409e9b438cb2b33972cd7dadbd71f2b6a59d1
--- /dev/null
+++ b/rng_state_49.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9789afc9eb30d45dc49a69bd432888ac6836ef5668db68c014d6ba71c890d262
+size 21698
diff --git a/rng_state_5.pth b/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5b87cde630dd8c3b9020219871b2174f778754fa
--- /dev/null
+++ b/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae151f2bd3e2589f8d312e51e6e8d556dc9296a0d7f26fafd2260bd1c97fb0f4
+size 21687
diff --git a/rng_state_50.pth b/rng_state_50.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ac9ec962c911319c4a4b42039ddaba64f6588d0d
--- /dev/null
+++ b/rng_state_50.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20500264cbd8c7e000d466fbeea63318f966b96c49a6b5d7091998e092a1a188
+size 21698
diff --git a/rng_state_51.pth b/rng_state_51.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c302eefbe0ad4d2cf94dc7ab347a453f00896edf
--- /dev/null
+++ b/rng_state_51.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:025f7058c3ff8d344a7dd701a5c7d68a8d01e203610fc1aa924543e3459c7bab
+size 21698
diff --git a/rng_state_52.pth b/rng_state_52.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d7e9057b7b6bff329938308dc17524f9acfc3a9
--- /dev/null
+++ b/rng_state_52.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab3b5f0e732a8a2626324b4e46038a572225644484e4d08efcefcb3e4e7c087d
+size 21698
diff --git a/rng_state_53.pth b/rng_state_53.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d3f397d9d38e32c752cc902e430e2c4c209237a4
--- /dev/null
+++ b/rng_state_53.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f83f361db9415c9f8267e77e6cc59eb37d6df293506c66718bc6639732256db
+size 21698
diff --git a/rng_state_54.pth b/rng_state_54.pth
new file mode 100644
index 0000000000000000000000000000000000000000..196bf1b4635ffc48a0cbaf2088702b5cd01c5884
--- /dev/null
+++ b/rng_state_54.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f098966c11444eb5b876b7084288c7ee7d7c406347ccf8db3034e7dc020e3e79
+size 21698
diff --git a/rng_state_55.pth b/rng_state_55.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2ce0540c7713b07a34a63d47d63916cf1f1b60a5
--- /dev/null
+++ b/rng_state_55.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2eceb4f6c460ddbd79594ed9c7832a0d98eb1086d361f4c507cc12cfc6e6217
+size 21698
diff --git a/rng_state_56.pth b/rng_state_56.pth
new file mode 100644
index 0000000000000000000000000000000000000000..59375909df70c2aaba5459ca78299561d323b85c
--- /dev/null
+++ b/rng_state_56.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23c96246a0c4ec84499aa01ae6fa22fe102f8a8205731c6b41cd280ce64beebc
+size 21698
diff --git a/rng_state_57.pth b/rng_state_57.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0736f36c6a4f835a1a41818b7c148767d30ef404
--- /dev/null
+++ b/rng_state_57.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5206d653cbd5d2c0978c032751ebc55091d57411bccda8bae4663c7100f0b764
+size 21698
diff --git a/rng_state_58.pth b/rng_state_58.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0d3612d731e97c6761e3527f5b056722fd27bfc0
--- /dev/null
+++ b/rng_state_58.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80e7f95d0a7e48a8d031627ef14af9fe31f8e3329a86a4685c5311e15e7e3802
+size 21698
diff --git a/rng_state_59.pth b/rng_state_59.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cd55432a02996c3d5ab4e5dfbd688e53ef5c07c2
--- /dev/null
+++ b/rng_state_59.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:067b8bc48403d9acb50e621317f598f511630da2c26c27ac041bcf5b591f9ffe
+size 21698
diff --git a/rng_state_6.pth b/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..22bd1dc6060a5a7ac736ad09cc1e4df16d887c06
--- /dev/null
+++ b/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a89e153800f32e9d9adeccfada4c4f3744e39e963d1db7a5c65cc48339c43178
+size 21687
diff --git a/rng_state_60.pth b/rng_state_60.pth
new file mode 100644
index 0000000000000000000000000000000000000000..47a9f0002dfbfd02d4505e20a69a5c692ae106a2
--- /dev/null
+++ b/rng_state_60.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf7d77313b8adcc5ccf16ea2833cb077a457659633c6711cadfe6805cdb79a14
+size 21698
diff --git a/rng_state_61.pth b/rng_state_61.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fb17a5640d92a9b924c3110615a5569ef411063e
--- /dev/null
+++ b/rng_state_61.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3d9d6b78a847ebe3128bd9500abd0d1f5ff960c6ec756d1d2eb8eb28bad86b4
+size 21698
diff --git a/rng_state_62.pth b/rng_state_62.pth
new file mode 100644
index 0000000000000000000000000000000000000000..086502cbdc62e4792cc4c3f0ed2c1eb251ecbf6a
--- /dev/null
+++ b/rng_state_62.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f1d1b569762fea936830a772812fe5b37c9c58ef631874036e538a4c54fcd0e
+size 21698
diff --git a/rng_state_63.pth b/rng_state_63.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ff90a438e11877801054dd9da54a78210e02b264
--- /dev/null
+++ b/rng_state_63.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b55a78aa8bfe3e80d9717fad32fed6924e2c1aa0971f3fefe2816736f0b84923
+size 21698
diff --git a/rng_state_7.pth b/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ac9b2a0a7e31779db9533edf4ae345bdb576b767
--- /dev/null
+++ b/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5317018b52d3086e703ddfe73befe6535978fc55374b88003ed8d9ee74d8627
+size 21687
diff --git a/rng_state_8.pth b/rng_state_8.pth
new file mode 100644
index 0000000000000000000000000000000000000000..95369cdd9b00f7c08d1a62be65b74e5319680ddf
--- /dev/null
+++ b/rng_state_8.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b81d0252f0dbb99a1948b4b88b282e9b34b2f6f94d98d818709e19a535d697f
+size 21687
diff --git a/rng_state_9.pth b/rng_state_9.pth
new file mode 100644
index 0000000000000000000000000000000000000000..264b50be663d9d5ff2a5276386fdcabda1a1dd85
--- /dev/null
+++ b/rng_state_9.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2cb9fb5ceac79acf0e1041ec03a0e281ae5eb3328f501370ea68e7b27c7fc5b
+size 21687
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..1023d3510a8f6a72f071f934b9319d2b3dba616e
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,38 @@
+{
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>",
+ "<|action_start|>",
+ "<|action_end|>",
+ "<|interpreter|>",
+ "<|plugin|>"
+ ],
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenization_internlm2.py b/tokenization_internlm2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff53eba214b313c86ef489a823167a7ef2b52c09
--- /dev/null
+++ b/tokenization_internlm2.py
@@ -0,0 +1,236 @@
+# coding=utf-8
+# Copyright (c) The InternLM team and The HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on transformers/src/transformers/models/llama/tokenization_llama.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tokenization classes for InternLM."""
+import os
+from shutil import copyfile
+from typing import Any, Dict, List, Optional, Tuple
+
+import sentencepiece as spm
+from transformers.tokenization_utils import PreTrainedTokenizer
+from transformers.utils import logging
+
+logger = logging.get_logger(__name__)
+
+VOCAB_FILES_NAMES = {"vocab_file": "./tokenizer.model"}
+
+PRETRAINED_VOCAB_FILES_MAP = {}
+
+
+# Modified from transformers.model.llama.tokenization_llama.LlamaTokenizer
+class InternLM2Tokenizer(PreTrainedTokenizer):
+ """
+ Construct a InternLM2 tokenizer. Based on byte-level Byte-Pair-Encoding.
+
+ Args:
+ vocab_file (`str`):
+ Path to the vocabulary file.
+ """
+
+ vocab_files_names = VOCAB_FILES_NAMES
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+ model_input_names = ["input_ids", "attention_mask"]
+ _auto_class = "AutoTokenizer"
+
+ def __init__(
+ self,
+ vocab_file,
+ unk_token="",
+ bos_token="",
+ eos_token="",
+ pad_token="",
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
+ add_bos_token=True,
+ add_eos_token=False,
+ decode_with_prefix_space=False,
+ clean_up_tokenization_spaces=False,
+ **kwargs,
+ ):
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
+ self.vocab_file = vocab_file
+ self.add_bos_token = add_bos_token
+ self.add_eos_token = add_eos_token
+ self.decode_with_prefix_space = decode_with_prefix_space
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+ self.sp_model.Load(vocab_file)
+ self._no_prefix_space_tokens = None
+ super().__init__(
+ bos_token=bos_token,
+ eos_token=eos_token,
+ unk_token=unk_token,
+ pad_token=pad_token,
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+ **kwargs,
+ )
+
+ @property
+ def no_prefix_space_tokens(self):
+ if self._no_prefix_space_tokens is None:
+ vocab = self.convert_ids_to_tokens(list(range(self.vocab_size)))
+ self._no_prefix_space_tokens = {i for i, tok in enumerate(vocab) if not tok.startswith("▁")}
+ return self._no_prefix_space_tokens
+
+ @property
+ def vocab_size(self):
+ """Returns vocab size"""
+ return self.sp_model.get_piece_size()
+
+ @property
+ def bos_token_id(self) -> Optional[int]:
+ return self.sp_model.bos_id()
+
+ @property
+ def eos_token_id(self) -> Optional[int]:
+ return self.sp_model.eos_id()
+
+ def get_vocab(self):
+ """Returns vocab as a dict"""
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
+ vocab.update(self.added_tokens_encoder)
+ return vocab
+
+ def _tokenize(self, text):
+ """Returns a tokenized string."""
+ return self.sp_model.encode(text, out_type=str)
+
+ def _convert_token_to_id(self, token):
+ """Converts a token (str) in an id using the vocab."""
+ return self.sp_model.piece_to_id(token)
+
+ def _convert_id_to_token(self, index):
+ """Converts an index (integer) in a token (str) using the vocab."""
+ token = self.sp_model.IdToPiece(index)
+ return token
+
+ def _maybe_add_prefix_space(self, tokens, decoded):
+ if tokens and tokens[0] not in self.no_prefix_space_tokens:
+ return " " + decoded
+ else:
+ return decoded
+
+ def convert_tokens_to_string(self, tokens):
+ """Converts a sequence of tokens (string) in a single string."""
+ current_sub_tokens = []
+ out_string = ""
+ prev_is_special = False
+ for token in tokens:
+ # make sure that special tokens are not decoded using sentencepiece model
+ if token in self.all_special_tokens:
+ if not prev_is_special:
+ out_string += " "
+ out_string += self.sp_model.decode(current_sub_tokens) + token
+ prev_is_special = True
+ current_sub_tokens = []
+ else:
+ current_sub_tokens.append(token)
+ prev_is_special = False
+ out_string += self.sp_model.decode(current_sub_tokens)
+ out_string = self.clean_up_tokenization(out_string)
+ out_string = self._maybe_add_prefix_space(tokens=tokens, decoded=out_string)
+ return out_string[1:]
+
+ def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
+ """
+ Save the vocabulary and special tokens file to a directory.
+
+ Args:
+ save_directory (`str`):
+ The directory in which to save the vocabulary.
+
+ Returns:
+ `Tuple(str)`: Paths to the files saved.
+ """
+ if not os.path.isdir(save_directory):
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
+ return
+ out_vocab_file = os.path.join(
+ save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
+ )
+
+ if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
+ copyfile(self.vocab_file, out_vocab_file)
+ elif not os.path.isfile(self.vocab_file):
+ with open(out_vocab_file, "wb") as fi:
+ content_spiece_model = self.sp_model.serialized_model_proto()
+ fi.write(content_spiece_model)
+
+ return (out_vocab_file,)
+
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
+ if self.add_bos_token:
+ bos_token_ids = [self.bos_token_id]
+ else:
+ bos_token_ids = []
+
+ output = bos_token_ids + token_ids_0
+
+ if token_ids_1 is not None:
+ output = output + token_ids_1
+
+ if self.add_eos_token:
+ output = output + [self.eos_token_id]
+
+ return output
+
+ def get_special_tokens_mask(
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+ ) -> List[int]:
+ """
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
+ special tokens using the tokenizer `prepare_for_model` method.
+
+ Args:
+ token_ids_0 (`List[int]`):
+ List of IDs.
+ token_ids_1 (`List[int]`, *optional*):
+ Optional second list of IDs for sequence pairs.
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
+ Whether or not the token list is already formatted with special tokens for the model.
+
+ Returns:
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+ """
+ if already_has_special_tokens:
+ return super().get_special_tokens_mask(
+ token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
+ )
+
+ if token_ids_1 is None:
+ return [1] + ([0] * len(token_ids_0)) + [1]
+ return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
+
+ def create_token_type_ids_from_sequences(
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+ ) -> List[int]:
+ """
+ Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make
+ use of token type ids, therefore a list of zeros is returned.
+
+ Args:
+ token_ids_0 (`List[int]`):
+ List of IDs.
+ token_ids_1 (`List[int]`, *optional*):
+ Optional second list of IDs for sequence pairs.
+
+ Returns:
+ `List[int]`: List of zeros.
+ """
+ eos = [self.eos_token_id]
+
+ if token_ids_1 is None:
+ return len(token_ids_0 + eos) * [0]
+ return len(token_ids_0 + eos + token_ids_1 + eos) * [0]
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6600712949ca9c4ffb50f25275993a21fba0b408
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f868398fc4e05ee1e8aeba95ddf18ddcc45b8bce55d5093bead5bbf80429b48b
+size 1477754
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c09328e63848220f5be589fd72e139f38ecbf5fb
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,99 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "92538": {
+ "content": "<|plugin|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "92539": {
+ "content": "<|interpreter|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "92540": {
+ "content": "<|action_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "92541": {
+ "content": "<|action_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "92542": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "92543": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>",
+ "<|action_start|>",
+ "<|action_end|>",
+ "<|interpreter|>",
+ "<|plugin|>"
+ ],
+ "auto_map": {
+ "AutoTokenizer": [
+ "tokenization_internlm2.InternLM2Tokenizer",
+ null
+ ]
+ },
+ "bos_token": "",
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "tokenizer_class": "InternLM2Tokenizer",
+ "unk_token": ""
+}
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4beb7891b7c059b9b79a20051cbe1682cbb24bcf
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,41659 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 5.0,
+ "eval_steps": 10000,
+ "global_step": 5205,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0,
+ "learning_rate": "3.8388e-07",
+ "loss": 7.8236,
+ "slid_loss": 7.8236,
+ "step": 1,
+ "time": 30.42
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": "7.6775e-07",
+ "loss": 7.8141,
+ "slid_loss": 7.8189,
+ "step": 2,
+ "time": 13.68
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": "1.1516e-06",
+ "loss": 7.9123,
+ "slid_loss": 7.85,
+ "step": 3,
+ "time": 14.51
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": "1.5355e-06",
+ "loss": 8.4664,
+ "slid_loss": 8.0041,
+ "step": 4,
+ "time": 14.12
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": "1.9194e-06",
+ "loss": 7.7963,
+ "slid_loss": 7.9625,
+ "step": 5,
+ "time": 11.58
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "2.3033e-06",
+ "loss": 8.1496,
+ "slid_loss": 7.9937,
+ "step": 6,
+ "time": 12.27
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "2.6871e-06",
+ "loss": 7.9949,
+ "slid_loss": 7.9939,
+ "step": 7,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "3.0710e-06",
+ "loss": 8.1081,
+ "slid_loss": 8.0082,
+ "step": 8,
+ "time": 14.3
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "3.4549e-06",
+ "loss": 8.0067,
+ "slid_loss": 8.008,
+ "step": 9,
+ "time": 13.35
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "3.8388e-06",
+ "loss": 7.3789,
+ "slid_loss": 7.9451,
+ "step": 10,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "4.2226e-06",
+ "loss": 7.1119,
+ "slid_loss": 7.8694,
+ "step": 11,
+ "time": 12.62
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "4.6065e-06",
+ "loss": 7.1156,
+ "slid_loss": 7.8065,
+ "step": 12,
+ "time": 14.07
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "4.9904e-06",
+ "loss": 6.4367,
+ "slid_loss": 7.7012,
+ "step": 13,
+ "time": 12.71
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "5.3743e-06",
+ "loss": 6.3016,
+ "slid_loss": 7.6012,
+ "step": 14,
+ "time": 13.09
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": "5.7582e-06",
+ "loss": 6.4107,
+ "slid_loss": 7.5218,
+ "step": 15,
+ "time": 13.66
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "6.1420e-06",
+ "loss": 5.4338,
+ "slid_loss": 7.3913,
+ "step": 16,
+ "time": 11.62
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "6.5259e-06",
+ "loss": 5.1555,
+ "slid_loss": 7.2598,
+ "step": 17,
+ "time": 13.01
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "6.9098e-06",
+ "loss": 5.268,
+ "slid_loss": 7.1492,
+ "step": 18,
+ "time": 11.91
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "7.2937e-06",
+ "loss": 5.3034,
+ "slid_loss": 7.052,
+ "step": 19,
+ "time": 12.71
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "7.6775e-06",
+ "loss": 5.1647,
+ "slid_loss": 6.9577,
+ "step": 20,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "8.0614e-06",
+ "loss": 4.853,
+ "slid_loss": 6.8574,
+ "step": 21,
+ "time": 11.64
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "8.4453e-06",
+ "loss": 4.031,
+ "slid_loss": 6.729,
+ "step": 22,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "8.8292e-06",
+ "loss": 4.4817,
+ "slid_loss": 6.6312,
+ "step": 23,
+ "time": 11.47
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "9.2131e-06",
+ "loss": 3.9349,
+ "slid_loss": 6.5189,
+ "step": 24,
+ "time": 13.88
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "9.5969e-06",
+ "loss": 3.6992,
+ "slid_loss": 6.4061,
+ "step": 25,
+ "time": 11.17
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": "9.9808e-06",
+ "loss": 3.3853,
+ "slid_loss": 6.2899,
+ "step": 26,
+ "time": 13.64
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.0365e-05",
+ "loss": 3.2214,
+ "slid_loss": 6.1763,
+ "step": 27,
+ "time": 13.83
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.0749e-05",
+ "loss": 2.7486,
+ "slid_loss": 6.0539,
+ "step": 28,
+ "time": 13.78
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.1132e-05",
+ "loss": 3.111,
+ "slid_loss": 5.9524,
+ "step": 29,
+ "time": 14.31
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.1516e-05",
+ "loss": 3.0806,
+ "slid_loss": 5.8566,
+ "step": 30,
+ "time": 12.18
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.1900e-05",
+ "loss": 2.5142,
+ "slid_loss": 5.7488,
+ "step": 31,
+ "time": 13.03
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.2284e-05",
+ "loss": 2.3965,
+ "slid_loss": 5.6441,
+ "step": 32,
+ "time": 13.99
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.2668e-05",
+ "loss": 2.3281,
+ "slid_loss": 5.5436,
+ "step": 33,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.3052e-05",
+ "loss": 2.4237,
+ "slid_loss": 5.4518,
+ "step": 34,
+ "time": 13.29
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.3436e-05",
+ "loss": 2.3792,
+ "slid_loss": 5.364,
+ "step": 35,
+ "time": 13.98
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": "1.3820e-05",
+ "loss": 2.2186,
+ "slid_loss": 5.2767,
+ "step": 36,
+ "time": 14.22
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.4203e-05",
+ "loss": 2.1564,
+ "slid_loss": 5.1923,
+ "step": 37,
+ "time": 12.22
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.4587e-05",
+ "loss": 1.8433,
+ "slid_loss": 5.1042,
+ "step": 38,
+ "time": 13.66
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.4971e-05",
+ "loss": 1.9437,
+ "slid_loss": 5.0232,
+ "step": 39,
+ "time": 15.1
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.5355e-05",
+ "loss": 1.9369,
+ "slid_loss": 4.946,
+ "step": 40,
+ "time": 13.07
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.5739e-05",
+ "loss": 1.612,
+ "slid_loss": 4.8647,
+ "step": 41,
+ "time": 11.44
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.6123e-05",
+ "loss": 1.6152,
+ "slid_loss": 4.7873,
+ "step": 42,
+ "time": 13.72
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.6507e-05",
+ "loss": 1.6263,
+ "slid_loss": 4.7138,
+ "step": 43,
+ "time": 11.49
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.6891e-05",
+ "loss": 1.4779,
+ "slid_loss": 4.6403,
+ "step": 44,
+ "time": 13.17
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.7274e-05",
+ "loss": 1.5903,
+ "slid_loss": 4.5725,
+ "step": 45,
+ "time": 13.66
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": "1.7658e-05",
+ "loss": 1.4481,
+ "slid_loss": 4.5046,
+ "step": 46,
+ "time": 13.07
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "1.8042e-05",
+ "loss": 1.4874,
+ "slid_loss": 4.4404,
+ "step": 47,
+ "time": 13.28
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "1.8426e-05",
+ "loss": 1.3457,
+ "slid_loss": 4.3759,
+ "step": 48,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "1.8810e-05",
+ "loss": 1.4239,
+ "slid_loss": 4.3156,
+ "step": 49,
+ "time": 13.19
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "1.9194e-05",
+ "loss": 1.2666,
+ "slid_loss": 4.2547,
+ "step": 50,
+ "time": 13.28
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "1.9578e-05",
+ "loss": 1.2803,
+ "slid_loss": 4.1963,
+ "step": 51,
+ "time": 13.66
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "1.9962e-05",
+ "loss": 1.4732,
+ "slid_loss": 4.144,
+ "step": 52,
+ "time": 12.77
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "2.0345e-05",
+ "loss": 1.2897,
+ "slid_loss": 4.0901,
+ "step": 53,
+ "time": 13.85
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "2.0729e-05",
+ "loss": 1.3271,
+ "slid_loss": 4.039,
+ "step": 54,
+ "time": 13.62
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "2.1113e-05",
+ "loss": 1.3042,
+ "slid_loss": 3.9892,
+ "step": 55,
+ "time": 12.89
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "2.1497e-05",
+ "loss": 1.2658,
+ "slid_loss": 3.9406,
+ "step": 56,
+ "time": 13.93
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": "2.1881e-05",
+ "loss": 1.2915,
+ "slid_loss": 3.8941,
+ "step": 57,
+ "time": 14.48
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.2265e-05",
+ "loss": 1.2305,
+ "slid_loss": 3.8482,
+ "step": 58,
+ "time": 13.53
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.2649e-05",
+ "loss": 1.2974,
+ "slid_loss": 3.805,
+ "step": 59,
+ "time": 11.61
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.3033e-05",
+ "loss": 1.2902,
+ "slid_loss": 3.7631,
+ "step": 60,
+ "time": 14.14
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.3417e-05",
+ "loss": 1.3371,
+ "slid_loss": 3.7233,
+ "step": 61,
+ "time": 13.32
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.3800e-05",
+ "loss": 1.3215,
+ "slid_loss": 3.6845,
+ "step": 62,
+ "time": 13.18
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.4184e-05",
+ "loss": 1.2516,
+ "slid_loss": 3.6459,
+ "step": 63,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.4568e-05",
+ "loss": 1.2789,
+ "slid_loss": 3.6089,
+ "step": 64,
+ "time": 11.21
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.4952e-05",
+ "loss": 1.2642,
+ "slid_loss": 3.5729,
+ "step": 65,
+ "time": 14.3
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.5336e-05",
+ "loss": 1.277,
+ "slid_loss": 3.5381,
+ "step": 66,
+ "time": 13.22
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": "2.5720e-05",
+ "loss": 1.4256,
+ "slid_loss": 3.5066,
+ "step": 67,
+ "time": 13.47
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.6104e-05",
+ "loss": 1.2143,
+ "slid_loss": 3.4728,
+ "step": 68,
+ "time": 12.97
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.6488e-05",
+ "loss": 1.1297,
+ "slid_loss": 3.4389,
+ "step": 69,
+ "time": 13.74
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.6871e-05",
+ "loss": 1.2613,
+ "slid_loss": 3.4078,
+ "step": 70,
+ "time": 13.64
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.7255e-05",
+ "loss": 1.1756,
+ "slid_loss": 3.3763,
+ "step": 71,
+ "time": 14.4
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.7639e-05",
+ "loss": 1.2345,
+ "slid_loss": 3.3466,
+ "step": 72,
+ "time": 12.93
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.8023e-05",
+ "loss": 1.1747,
+ "slid_loss": 3.3168,
+ "step": 73,
+ "time": 11.38
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.8407e-05",
+ "loss": 1.1257,
+ "slid_loss": 3.2872,
+ "step": 74,
+ "time": 13.68
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.8791e-05",
+ "loss": 1.3381,
+ "slid_loss": 3.2612,
+ "step": 75,
+ "time": 12.83
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.9175e-05",
+ "loss": 1.1701,
+ "slid_loss": 3.2337,
+ "step": 76,
+ "time": 13.48
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.9559e-05",
+ "loss": 1.1704,
+ "slid_loss": 3.2069,
+ "step": 77,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": "2.9942e-05",
+ "loss": 1.2441,
+ "slid_loss": 3.1818,
+ "step": 78,
+ "time": 12.22
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.0326e-05",
+ "loss": 1.2319,
+ "slid_loss": 3.1571,
+ "step": 79,
+ "time": 11.8
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.0710e-05",
+ "loss": 1.2055,
+ "slid_loss": 3.1327,
+ "step": 80,
+ "time": 13.84
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.1094e-05",
+ "loss": 1.1839,
+ "slid_loss": 3.1086,
+ "step": 81,
+ "time": 13.38
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.1478e-05",
+ "loss": 1.2603,
+ "slid_loss": 3.0861,
+ "step": 82,
+ "time": 13.51
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.1862e-05",
+ "loss": 1.0705,
+ "slid_loss": 3.0618,
+ "step": 83,
+ "time": 13.16
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.2246e-05",
+ "loss": 1.236,
+ "slid_loss": 3.0401,
+ "step": 84,
+ "time": 13.83
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.2630e-05",
+ "loss": 1.2927,
+ "slid_loss": 3.0195,
+ "step": 85,
+ "time": 13.91
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.3013e-05",
+ "loss": 1.1653,
+ "slid_loss": 2.9979,
+ "step": 86,
+ "time": 13.88
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.3397e-05",
+ "loss": 1.1917,
+ "slid_loss": 2.9772,
+ "step": 87,
+ "time": 13.44
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": "3.3781e-05",
+ "loss": 1.0867,
+ "slid_loss": 2.9557,
+ "step": 88,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.4165e-05",
+ "loss": 1.3115,
+ "slid_loss": 2.9372,
+ "step": 89,
+ "time": 13.74
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.4549e-05",
+ "loss": 1.1756,
+ "slid_loss": 2.9177,
+ "step": 90,
+ "time": 13.91
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.4933e-05",
+ "loss": 1.1516,
+ "slid_loss": 2.8982,
+ "step": 91,
+ "time": 12.35
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.5317e-05",
+ "loss": 1.2829,
+ "slid_loss": 2.8807,
+ "step": 92,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.5701e-05",
+ "loss": 1.1734,
+ "slid_loss": 2.8623,
+ "step": 93,
+ "time": 13.1
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.6084e-05",
+ "loss": 1.168,
+ "slid_loss": 2.8443,
+ "step": 94,
+ "time": 13.42
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.6468e-05",
+ "loss": 1.1861,
+ "slid_loss": 2.8269,
+ "step": 95,
+ "time": 13.57
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.6852e-05",
+ "loss": 1.1554,
+ "slid_loss": 2.8094,
+ "step": 96,
+ "time": 12.92
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.7236e-05",
+ "loss": 1.0486,
+ "slid_loss": 2.7913,
+ "step": 97,
+ "time": 11.1
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": "3.7620e-05",
+ "loss": 1.1777,
+ "slid_loss": 2.7748,
+ "step": 98,
+ "time": 12.76
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "3.8004e-05",
+ "loss": 1.0456,
+ "slid_loss": 2.7574,
+ "step": 99,
+ "time": 12.89
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "3.8388e-05",
+ "loss": 1.1834,
+ "slid_loss": 2.7416,
+ "step": 100,
+ "time": 11.2
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "3.8772e-05",
+ "loss": 1.2001,
+ "slid_loss": 2.6754,
+ "step": 101,
+ "time": 11.55
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "3.9155e-05",
+ "loss": 1.1745,
+ "slid_loss": 2.609,
+ "step": 102,
+ "time": 13.0
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "3.9539e-05",
+ "loss": 1.135,
+ "slid_loss": 2.5412,
+ "step": 103,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "3.9923e-05",
+ "loss": 1.3012,
+ "slid_loss": 2.4696,
+ "step": 104,
+ "time": 12.68
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "4.0307e-05",
+ "loss": 1.0936,
+ "slid_loss": 2.4025,
+ "step": 105,
+ "time": 13.53
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "4.0691e-05",
+ "loss": 1.3079,
+ "slid_loss": 2.3341,
+ "step": 106,
+ "time": 11.93
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "4.1075e-05",
+ "loss": 1.2324,
+ "slid_loss": 2.2665,
+ "step": 107,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "4.1459e-05",
+ "loss": 1.1724,
+ "slid_loss": 2.1971,
+ "step": 108,
+ "time": 11.59
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": "4.1843e-05",
+ "loss": 1.0703,
+ "slid_loss": 2.1278,
+ "step": 109,
+ "time": 13.07
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.2226e-05",
+ "loss": 1.103,
+ "slid_loss": 2.065,
+ "step": 110,
+ "time": 13.02
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.2610e-05",
+ "loss": 1.2006,
+ "slid_loss": 2.0059,
+ "step": 111,
+ "time": 12.15
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.2994e-05",
+ "loss": 1.2,
+ "slid_loss": 1.9467,
+ "step": 112,
+ "time": 11.78
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.3378e-05",
+ "loss": 1.1365,
+ "slid_loss": 1.8937,
+ "step": 113,
+ "time": 14.08
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.3762e-05",
+ "loss": 1.1729,
+ "slid_loss": 1.8425,
+ "step": 114,
+ "time": 12.99
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.4146e-05",
+ "loss": 1.2536,
+ "slid_loss": 1.7909,
+ "step": 115,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.4530e-05",
+ "loss": 1.1477,
+ "slid_loss": 1.748,
+ "step": 116,
+ "time": 10.8
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.4914e-05",
+ "loss": 1.1827,
+ "slid_loss": 1.7083,
+ "step": 117,
+ "time": 14.12
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.5298e-05",
+ "loss": 1.0507,
+ "slid_loss": 1.6661,
+ "step": 118,
+ "time": 12.9
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": "4.5681e-05",
+ "loss": 1.2179,
+ "slid_loss": 1.6253,
+ "step": 119,
+ "time": 11.94
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.6065e-05",
+ "loss": 1.0919,
+ "slid_loss": 1.5845,
+ "step": 120,
+ "time": 13.57
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.6449e-05",
+ "loss": 1.1115,
+ "slid_loss": 1.5471,
+ "step": 121,
+ "time": 13.49
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.6833e-05",
+ "loss": 1.1144,
+ "slid_loss": 1.518,
+ "step": 122,
+ "time": 13.82
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.7217e-05",
+ "loss": 1.1727,
+ "slid_loss": 1.4849,
+ "step": 123,
+ "time": 12.95
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.7601e-05",
+ "loss": 1.1816,
+ "slid_loss": 1.4573,
+ "step": 124,
+ "time": 13.23
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.7985e-05",
+ "loss": 1.1854,
+ "slid_loss": 1.4322,
+ "step": 125,
+ "time": 13.22
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.8369e-05",
+ "loss": 1.1493,
+ "slid_loss": 1.4098,
+ "step": 126,
+ "time": 12.84
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.8752e-05",
+ "loss": 1.148,
+ "slid_loss": 1.3891,
+ "step": 127,
+ "time": 12.26
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.9136e-05",
+ "loss": 1.1457,
+ "slid_loss": 1.3731,
+ "step": 128,
+ "time": 13.94
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.9520e-05",
+ "loss": 1.0894,
+ "slid_loss": 1.3529,
+ "step": 129,
+ "time": 11.57
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": "4.9904e-05",
+ "loss": 1.1805,
+ "slid_loss": 1.3339,
+ "step": 130,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.0288e-05",
+ "loss": 1.2985,
+ "slid_loss": 1.3217,
+ "step": 131,
+ "time": 13.13
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.0672e-05",
+ "loss": 1.2082,
+ "slid_loss": 1.3098,
+ "step": 132,
+ "time": 11.74
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.1056e-05",
+ "loss": 1.1836,
+ "slid_loss": 1.2984,
+ "step": 133,
+ "time": 12.08
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.1440e-05",
+ "loss": 1.0953,
+ "slid_loss": 1.2851,
+ "step": 134,
+ "time": 13.17
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.1823e-05",
+ "loss": 1.1741,
+ "slid_loss": 1.273,
+ "step": 135,
+ "time": 13.01
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.2207e-05",
+ "loss": 1.2184,
+ "slid_loss": 1.263,
+ "step": 136,
+ "time": 12.95
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.2591e-05",
+ "loss": 0.9831,
+ "slid_loss": 1.2513,
+ "step": 137,
+ "time": 11.96
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.2975e-05",
+ "loss": 1.1949,
+ "slid_loss": 1.2448,
+ "step": 138,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.3359e-05",
+ "loss": 1.0852,
+ "slid_loss": 1.2362,
+ "step": 139,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": "5.3743e-05",
+ "loss": 1.1562,
+ "slid_loss": 1.2284,
+ "step": 140,
+ "time": 13.78
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.4127e-05",
+ "loss": 1.3207,
+ "slid_loss": 1.2255,
+ "step": 141,
+ "time": 14.15
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.4511e-05",
+ "loss": 1.2369,
+ "slid_loss": 1.2217,
+ "step": 142,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.4894e-05",
+ "loss": 1.1405,
+ "slid_loss": 1.2169,
+ "step": 143,
+ "time": 13.59
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.5278e-05",
+ "loss": 1.1452,
+ "slid_loss": 1.2135,
+ "step": 144,
+ "time": 13.62
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.5662e-05",
+ "loss": 1.3261,
+ "slid_loss": 1.2109,
+ "step": 145,
+ "time": 13.76
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.6046e-05",
+ "loss": 1.1188,
+ "slid_loss": 1.2076,
+ "step": 146,
+ "time": 13.53
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.6430e-05",
+ "loss": 1.1551,
+ "slid_loss": 1.2043,
+ "step": 147,
+ "time": 12.6
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.6814e-05",
+ "loss": 1.1416,
+ "slid_loss": 1.2022,
+ "step": 148,
+ "time": 13.91
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.7198e-05",
+ "loss": 1.1112,
+ "slid_loss": 1.1991,
+ "step": 149,
+ "time": 11.75
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": "5.7582e-05",
+ "loss": 1.1905,
+ "slid_loss": 1.1984,
+ "step": 150,
+ "time": 12.6
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "5.7965e-05",
+ "loss": 1.1339,
+ "slid_loss": 1.1969,
+ "step": 151,
+ "time": 13.72
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "5.8349e-05",
+ "loss": 1.1841,
+ "slid_loss": 1.194,
+ "step": 152,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "5.8733e-05",
+ "loss": 1.1096,
+ "slid_loss": 1.1922,
+ "step": 153,
+ "time": 13.26
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "5.9117e-05",
+ "loss": 1.1083,
+ "slid_loss": 1.19,
+ "step": 154,
+ "time": 12.95
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "5.9501e-05",
+ "loss": 1.0981,
+ "slid_loss": 1.188,
+ "step": 155,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "5.9885e-05",
+ "loss": 1.0515,
+ "slid_loss": 1.1858,
+ "step": 156,
+ "time": 12.8
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "6.0269e-05",
+ "loss": 1.1457,
+ "slid_loss": 1.1844,
+ "step": 157,
+ "time": 14.29
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "6.0653e-05",
+ "loss": 1.1588,
+ "slid_loss": 1.1836,
+ "step": 158,
+ "time": 12.79
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "6.1036e-05",
+ "loss": 1.141,
+ "slid_loss": 1.1821,
+ "step": 159,
+ "time": 13.76
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "6.1420e-05",
+ "loss": 1.2085,
+ "slid_loss": 1.1813,
+ "step": 160,
+ "time": 12.75
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": "6.1804e-05",
+ "loss": 1.0107,
+ "slid_loss": 1.178,
+ "step": 161,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.2188e-05",
+ "loss": 1.1931,
+ "slid_loss": 1.1767,
+ "step": 162,
+ "time": 13.93
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.2572e-05",
+ "loss": 1.1413,
+ "slid_loss": 1.1756,
+ "step": 163,
+ "time": 12.96
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.2956e-05",
+ "loss": 1.2096,
+ "slid_loss": 1.1749,
+ "step": 164,
+ "time": 13.31
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.3340e-05",
+ "loss": 1.2266,
+ "slid_loss": 1.1745,
+ "step": 165,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.3724e-05",
+ "loss": 1.1511,
+ "slid_loss": 1.1733,
+ "step": 166,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.4107e-05",
+ "loss": 1.0903,
+ "slid_loss": 1.1699,
+ "step": 167,
+ "time": 13.46
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.4491e-05",
+ "loss": 1.1374,
+ "slid_loss": 1.1692,
+ "step": 168,
+ "time": 13.69
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.4875e-05",
+ "loss": 1.134,
+ "slid_loss": 1.1692,
+ "step": 169,
+ "time": 12.1
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.5259e-05",
+ "loss": 1.2274,
+ "slid_loss": 1.1689,
+ "step": 170,
+ "time": 14.0
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": "6.5643e-05",
+ "loss": 1.0998,
+ "slid_loss": 1.1681,
+ "step": 171,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.6027e-05",
+ "loss": 1.1588,
+ "slid_loss": 1.1673,
+ "step": 172,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.6411e-05",
+ "loss": 1.1034,
+ "slid_loss": 1.1666,
+ "step": 173,
+ "time": 12.84
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.6795e-05",
+ "loss": 1.1018,
+ "slid_loss": 1.1664,
+ "step": 174,
+ "time": 12.14
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.7179e-05",
+ "loss": 1.2207,
+ "slid_loss": 1.1652,
+ "step": 175,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.7562e-05",
+ "loss": 1.1205,
+ "slid_loss": 1.1647,
+ "step": 176,
+ "time": 13.27
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.7946e-05",
+ "loss": 1.0301,
+ "slid_loss": 1.1633,
+ "step": 177,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.8330e-05",
+ "loss": 1.0419,
+ "slid_loss": 1.1613,
+ "step": 178,
+ "time": 14.18
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.8714e-05",
+ "loss": 1.0584,
+ "slid_loss": 1.1596,
+ "step": 179,
+ "time": 12.28
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.9098e-05",
+ "loss": 1.1984,
+ "slid_loss": 1.1595,
+ "step": 180,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.9482e-05",
+ "loss": 1.1476,
+ "slid_loss": 1.1591,
+ "step": 181,
+ "time": 12.67
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": "6.9866e-05",
+ "loss": 1.0205,
+ "slid_loss": 1.1567,
+ "step": 182,
+ "time": 13.03
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.0250e-05",
+ "loss": 1.1188,
+ "slid_loss": 1.1572,
+ "step": 183,
+ "time": 13.11
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.0633e-05",
+ "loss": 1.1847,
+ "slid_loss": 1.1567,
+ "step": 184,
+ "time": 13.85
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.1017e-05",
+ "loss": 1.1538,
+ "slid_loss": 1.1553,
+ "step": 185,
+ "time": 13.11
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.1401e-05",
+ "loss": 1.1634,
+ "slid_loss": 1.1553,
+ "step": 186,
+ "time": 13.35
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.1785e-05",
+ "loss": 1.0253,
+ "slid_loss": 1.1536,
+ "step": 187,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.2169e-05",
+ "loss": 1.0978,
+ "slid_loss": 1.1537,
+ "step": 188,
+ "time": 12.95
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.2553e-05",
+ "loss": 1.1685,
+ "slid_loss": 1.1523,
+ "step": 189,
+ "time": 13.35
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.2937e-05",
+ "loss": 1.1842,
+ "slid_loss": 1.1524,
+ "step": 190,
+ "time": 13.56
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.3321e-05",
+ "loss": 1.1207,
+ "slid_loss": 1.1521,
+ "step": 191,
+ "time": 12.95
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": "7.3704e-05",
+ "loss": 1.1619,
+ "slid_loss": 1.1509,
+ "step": 192,
+ "time": 13.46
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.4088e-05",
+ "loss": 1.1596,
+ "slid_loss": 1.1507,
+ "step": 193,
+ "time": 14.22
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.4472e-05",
+ "loss": 1.1408,
+ "slid_loss": 1.1505,
+ "step": 194,
+ "time": 13.57
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.4856e-05",
+ "loss": 1.2049,
+ "slid_loss": 1.1507,
+ "step": 195,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.5240e-05",
+ "loss": 1.192,
+ "slid_loss": 1.151,
+ "step": 196,
+ "time": 13.77
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.5624e-05",
+ "loss": 1.05,
+ "slid_loss": 1.151,
+ "step": 197,
+ "time": 13.84
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.6008e-05",
+ "loss": 1.1504,
+ "slid_loss": 1.1508,
+ "step": 198,
+ "time": 12.89
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.6392e-05",
+ "loss": 1.1557,
+ "slid_loss": 1.1519,
+ "step": 199,
+ "time": 11.6
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.6775e-05",
+ "loss": 1.1347,
+ "slid_loss": 1.1514,
+ "step": 200,
+ "time": 14.39
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.7159e-05",
+ "loss": 1.0396,
+ "slid_loss": 1.1498,
+ "step": 201,
+ "time": 13.35
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": "7.7543e-05",
+ "loss": 1.0315,
+ "slid_loss": 1.1483,
+ "step": 202,
+ "time": 12.04
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "7.7927e-05",
+ "loss": 1.0682,
+ "slid_loss": 1.1477,
+ "step": 203,
+ "time": 13.47
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "7.8311e-05",
+ "loss": 1.0855,
+ "slid_loss": 1.1455,
+ "step": 204,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "7.8695e-05",
+ "loss": 1.2007,
+ "slid_loss": 1.1466,
+ "step": 205,
+ "time": 13.74
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "7.9079e-05",
+ "loss": 1.1677,
+ "slid_loss": 1.1452,
+ "step": 206,
+ "time": 12.92
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "7.9463e-05",
+ "loss": 1.225,
+ "slid_loss": 1.1451,
+ "step": 207,
+ "time": 12.72
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "7.9846e-05",
+ "loss": 1.0762,
+ "slid_loss": 1.1441,
+ "step": 208,
+ "time": 13.34
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "8.0230e-05",
+ "loss": 1.2651,
+ "slid_loss": 1.1461,
+ "step": 209,
+ "time": 11.94
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "8.0614e-05",
+ "loss": 1.2334,
+ "slid_loss": 1.1474,
+ "step": 210,
+ "time": 13.82
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "8.0998e-05",
+ "loss": 1.1817,
+ "slid_loss": 1.1472,
+ "step": 211,
+ "time": 13.56
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "8.1382e-05",
+ "loss": 1.1834,
+ "slid_loss": 1.147,
+ "step": 212,
+ "time": 12.14
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": "8.1766e-05",
+ "loss": 1.0714,
+ "slid_loss": 1.1464,
+ "step": 213,
+ "time": 13.21
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.2150e-05",
+ "loss": 1.1633,
+ "slid_loss": 1.1463,
+ "step": 214,
+ "time": 13.95
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.2534e-05",
+ "loss": 1.1507,
+ "slid_loss": 1.1453,
+ "step": 215,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.2917e-05",
+ "loss": 1.1519,
+ "slid_loss": 1.1453,
+ "step": 216,
+ "time": 13.49
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.3301e-05",
+ "loss": 1.2125,
+ "slid_loss": 1.1456,
+ "step": 217,
+ "time": 14.76
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.3685e-05",
+ "loss": 1.178,
+ "slid_loss": 1.1469,
+ "step": 218,
+ "time": 13.13
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.4069e-05",
+ "loss": 1.1489,
+ "slid_loss": 1.1462,
+ "step": 219,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.4453e-05",
+ "loss": 1.0851,
+ "slid_loss": 1.1461,
+ "step": 220,
+ "time": 13.56
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.4837e-05",
+ "loss": 1.1002,
+ "slid_loss": 1.146,
+ "step": 221,
+ "time": 14.01
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.5221e-05",
+ "loss": 1.1486,
+ "slid_loss": 1.1464,
+ "step": 222,
+ "time": 14.01
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": "8.5605e-05",
+ "loss": 1.2666,
+ "slid_loss": 1.1473,
+ "step": 223,
+ "time": 14.16
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.5988e-05",
+ "loss": 0.9827,
+ "slid_loss": 1.1453,
+ "step": 224,
+ "time": 13.3
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.6372e-05",
+ "loss": 1.2495,
+ "slid_loss": 1.1459,
+ "step": 225,
+ "time": 11.82
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.6756e-05",
+ "loss": 1.0493,
+ "slid_loss": 1.1449,
+ "step": 226,
+ "time": 13.01
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.7140e-05",
+ "loss": 1.1185,
+ "slid_loss": 1.1446,
+ "step": 227,
+ "time": 13.92
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.7524e-05",
+ "loss": 1.1107,
+ "slid_loss": 1.1443,
+ "step": 228,
+ "time": 11.21
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.7908e-05",
+ "loss": 1.1272,
+ "slid_loss": 1.1447,
+ "step": 229,
+ "time": 12.47
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.8292e-05",
+ "loss": 1.1637,
+ "slid_loss": 1.1445,
+ "step": 230,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.8676e-05",
+ "loss": 1.1073,
+ "slid_loss": 1.1426,
+ "step": 231,
+ "time": 13.91
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.9060e-05",
+ "loss": 1.1314,
+ "slid_loss": 1.1418,
+ "step": 232,
+ "time": 11.91
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.9443e-05",
+ "loss": 1.1864,
+ "slid_loss": 1.1419,
+ "step": 233,
+ "time": 13.29
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": "8.9827e-05",
+ "loss": 1.161,
+ "slid_loss": 1.1425,
+ "step": 234,
+ "time": 11.8
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.0211e-05",
+ "loss": 1.2014,
+ "slid_loss": 1.1428,
+ "step": 235,
+ "time": 13.69
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.0595e-05",
+ "loss": 1.1488,
+ "slid_loss": 1.1421,
+ "step": 236,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.0979e-05",
+ "loss": 1.1874,
+ "slid_loss": 1.1441,
+ "step": 237,
+ "time": 13.69
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.1363e-05",
+ "loss": 1.2081,
+ "slid_loss": 1.1443,
+ "step": 238,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.1747e-05",
+ "loss": 1.1443,
+ "slid_loss": 1.1449,
+ "step": 239,
+ "time": 13.6
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.2131e-05",
+ "loss": 1.1631,
+ "slid_loss": 1.1449,
+ "step": 240,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.2514e-05",
+ "loss": 1.1786,
+ "slid_loss": 1.1435,
+ "step": 241,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.2898e-05",
+ "loss": 1.1789,
+ "slid_loss": 1.1429,
+ "step": 242,
+ "time": 13.09
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.3282e-05",
+ "loss": 1.1783,
+ "slid_loss": 1.1433,
+ "step": 243,
+ "time": 13.19
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": "9.3666e-05",
+ "loss": 1.1004,
+ "slid_loss": 1.1429,
+ "step": 244,
+ "time": 13.39
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.4050e-05",
+ "loss": 1.1534,
+ "slid_loss": 1.1411,
+ "step": 245,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.4434e-05",
+ "loss": 1.1435,
+ "slid_loss": 1.1414,
+ "step": 246,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.4818e-05",
+ "loss": 0.9677,
+ "slid_loss": 1.1395,
+ "step": 247,
+ "time": 11.06
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.5202e-05",
+ "loss": 1.0647,
+ "slid_loss": 1.1387,
+ "step": 248,
+ "time": 14.1
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.5585e-05",
+ "loss": 1.2235,
+ "slid_loss": 1.1399,
+ "step": 249,
+ "time": 11.99
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.5969e-05",
+ "loss": 1.1393,
+ "slid_loss": 1.1393,
+ "step": 250,
+ "time": 13.29
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.6353e-05",
+ "loss": 1.0045,
+ "slid_loss": 1.138,
+ "step": 251,
+ "time": 13.85
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.6737e-05",
+ "loss": 1.1903,
+ "slid_loss": 1.1381,
+ "step": 252,
+ "time": 13.62
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.7121e-05",
+ "loss": 1.0955,
+ "slid_loss": 1.138,
+ "step": 253,
+ "time": 14.07
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.7505e-05",
+ "loss": 1.1831,
+ "slid_loss": 1.1387,
+ "step": 254,
+ "time": 14.16
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": "9.7889e-05",
+ "loss": 1.1305,
+ "slid_loss": 1.139,
+ "step": 255,
+ "time": 11.9
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "9.8273e-05",
+ "loss": 1.1394,
+ "slid_loss": 1.1399,
+ "step": 256,
+ "time": 11.29
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "9.8656e-05",
+ "loss": 1.1808,
+ "slid_loss": 1.1403,
+ "step": 257,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "9.9040e-05",
+ "loss": 1.2763,
+ "slid_loss": 1.1414,
+ "step": 258,
+ "time": 12.85
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "9.9424e-05",
+ "loss": 1.1336,
+ "slid_loss": 1.1414,
+ "step": 259,
+ "time": 12.79
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "9.9808e-05",
+ "loss": 1.0957,
+ "slid_loss": 1.1402,
+ "step": 260,
+ "time": 13.59
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "1.0019e-04",
+ "loss": 1.2769,
+ "slid_loss": 1.1429,
+ "step": 261,
+ "time": 11.94
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "1.0058e-04",
+ "loss": 1.1852,
+ "slid_loss": 1.1428,
+ "step": 262,
+ "time": 12.92
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "1.0096e-04",
+ "loss": 1.1485,
+ "slid_loss": 1.1429,
+ "step": 263,
+ "time": 11.72
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "1.0134e-04",
+ "loss": 1.1632,
+ "slid_loss": 1.1424,
+ "step": 264,
+ "time": 12.8
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": "1.0173e-04",
+ "loss": 1.0556,
+ "slid_loss": 1.1407,
+ "step": 265,
+ "time": 13.66
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0211e-04",
+ "loss": 1.0841,
+ "slid_loss": 1.1401,
+ "step": 266,
+ "time": 12.47
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0250e-04",
+ "loss": 1.1209,
+ "slid_loss": 1.1404,
+ "step": 267,
+ "time": 12.88
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0288e-04",
+ "loss": 1.0899,
+ "slid_loss": 1.1399,
+ "step": 268,
+ "time": 14.07
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0326e-04",
+ "loss": 1.0307,
+ "slid_loss": 1.1389,
+ "step": 269,
+ "time": 13.09
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0365e-04",
+ "loss": 1.0207,
+ "slid_loss": 1.1368,
+ "step": 270,
+ "time": 12.89
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0403e-04",
+ "loss": 1.0853,
+ "slid_loss": 1.1366,
+ "step": 271,
+ "time": 11.4
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0441e-04",
+ "loss": 1.1743,
+ "slid_loss": 1.1368,
+ "step": 272,
+ "time": 11.56
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0480e-04",
+ "loss": 1.0893,
+ "slid_loss": 1.1367,
+ "step": 273,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0518e-04",
+ "loss": 1.0858,
+ "slid_loss": 1.1365,
+ "step": 274,
+ "time": 12.9
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": "1.0557e-04",
+ "loss": 1.1882,
+ "slid_loss": 1.1362,
+ "step": 275,
+ "time": 11.78
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0595e-04",
+ "loss": 1.1338,
+ "slid_loss": 1.1363,
+ "step": 276,
+ "time": 13.03
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0633e-04",
+ "loss": 1.0547,
+ "slid_loss": 1.1365,
+ "step": 277,
+ "time": 13.28
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0672e-04",
+ "loss": 1.0742,
+ "slid_loss": 1.1369,
+ "step": 278,
+ "time": 12.05
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0710e-04",
+ "loss": 1.0833,
+ "slid_loss": 1.1371,
+ "step": 279,
+ "time": 11.89
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0749e-04",
+ "loss": 1.0823,
+ "slid_loss": 1.136,
+ "step": 280,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0787e-04",
+ "loss": 1.0255,
+ "slid_loss": 1.1347,
+ "step": 281,
+ "time": 11.85
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0825e-04",
+ "loss": 1.1314,
+ "slid_loss": 1.1358,
+ "step": 282,
+ "time": 12.19
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0864e-04",
+ "loss": 1.0214,
+ "slid_loss": 1.1349,
+ "step": 283,
+ "time": 12.35
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0902e-04",
+ "loss": 1.092,
+ "slid_loss": 1.1339,
+ "step": 284,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0940e-04",
+ "loss": 1.1086,
+ "slid_loss": 1.1335,
+ "step": 285,
+ "time": 12.97
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": "1.0979e-04",
+ "loss": 1.1321,
+ "slid_loss": 1.1332,
+ "step": 286,
+ "time": 12.21
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1017e-04",
+ "loss": 1.1786,
+ "slid_loss": 1.1347,
+ "step": 287,
+ "time": 12.86
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1056e-04",
+ "loss": 1.0995,
+ "slid_loss": 1.1347,
+ "step": 288,
+ "time": 12.22
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1094e-04",
+ "loss": 1.1757,
+ "slid_loss": 1.1348,
+ "step": 289,
+ "time": 13.0
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1132e-04",
+ "loss": 1.1149,
+ "slid_loss": 1.1341,
+ "step": 290,
+ "time": 11.96
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1171e-04",
+ "loss": 1.0929,
+ "slid_loss": 1.1338,
+ "step": 291,
+ "time": 13.93
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1209e-04",
+ "loss": 1.2009,
+ "slid_loss": 1.1342,
+ "step": 292,
+ "time": 10.88
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1248e-04",
+ "loss": 1.2116,
+ "slid_loss": 1.1347,
+ "step": 293,
+ "time": 13.32
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1286e-04",
+ "loss": 1.1023,
+ "slid_loss": 1.1344,
+ "step": 294,
+ "time": 13.38
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1324e-04",
+ "loss": 1.0516,
+ "slid_loss": 1.1328,
+ "step": 295,
+ "time": 12.27
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": "1.1363e-04",
+ "loss": 1.0346,
+ "slid_loss": 1.1313,
+ "step": 296,
+ "time": 14.2
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1401e-04",
+ "loss": 1.0819,
+ "slid_loss": 1.1316,
+ "step": 297,
+ "time": 13.89
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1440e-04",
+ "loss": 1.0712,
+ "slid_loss": 1.1308,
+ "step": 298,
+ "time": 13.5
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1478e-04",
+ "loss": 1.1035,
+ "slid_loss": 1.1303,
+ "step": 299,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1516e-04",
+ "loss": 1.1898,
+ "slid_loss": 1.1308,
+ "step": 300,
+ "time": 11.83
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1555e-04",
+ "loss": 1.0281,
+ "slid_loss": 1.1307,
+ "step": 301,
+ "time": 13.84
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1593e-04",
+ "loss": 0.9542,
+ "slid_loss": 1.1299,
+ "step": 302,
+ "time": 13.42
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1631e-04",
+ "loss": 1.1331,
+ "slid_loss": 1.1306,
+ "step": 303,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1670e-04",
+ "loss": 1.105,
+ "slid_loss": 1.1308,
+ "step": 304,
+ "time": 14.05
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1708e-04",
+ "loss": 1.1077,
+ "slid_loss": 1.1298,
+ "step": 305,
+ "time": 13.58
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1747e-04",
+ "loss": 1.0667,
+ "slid_loss": 1.1288,
+ "step": 306,
+ "time": 13.03
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": "1.1785e-04",
+ "loss": 1.1468,
+ "slid_loss": 1.128,
+ "step": 307,
+ "time": 12.5
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.1823e-04",
+ "loss": 1.1221,
+ "slid_loss": 1.1285,
+ "step": 308,
+ "time": 11.72
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.1862e-04",
+ "loss": 1.1155,
+ "slid_loss": 1.127,
+ "step": 309,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.1900e-04",
+ "loss": 1.1023,
+ "slid_loss": 1.1257,
+ "step": 310,
+ "time": 13.83
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.1939e-04",
+ "loss": 1.0897,
+ "slid_loss": 1.1248,
+ "step": 311,
+ "time": 14.47
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.1977e-04",
+ "loss": 1.2274,
+ "slid_loss": 1.1252,
+ "step": 312,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.2015e-04",
+ "loss": 1.0339,
+ "slid_loss": 1.1248,
+ "step": 313,
+ "time": 14.41
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.2054e-04",
+ "loss": 1.0379,
+ "slid_loss": 1.1236,
+ "step": 314,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.2092e-04",
+ "loss": 1.1425,
+ "slid_loss": 1.1235,
+ "step": 315,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.2131e-04",
+ "loss": 1.1424,
+ "slid_loss": 1.1234,
+ "step": 316,
+ "time": 12.76
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": "1.2169e-04",
+ "loss": 1.0492,
+ "slid_loss": 1.1218,
+ "step": 317,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2207e-04",
+ "loss": 1.0339,
+ "slid_loss": 1.1203,
+ "step": 318,
+ "time": 13.21
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2246e-04",
+ "loss": 1.0246,
+ "slid_loss": 1.1191,
+ "step": 319,
+ "time": 10.85
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2284e-04",
+ "loss": 1.1394,
+ "slid_loss": 1.1196,
+ "step": 320,
+ "time": 12.82
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2322e-04",
+ "loss": 1.0116,
+ "slid_loss": 1.1188,
+ "step": 321,
+ "time": 12.63
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2361e-04",
+ "loss": 1.0519,
+ "slid_loss": 1.1178,
+ "step": 322,
+ "time": 13.44
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2399e-04",
+ "loss": 1.1848,
+ "slid_loss": 1.117,
+ "step": 323,
+ "time": 13.94
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2438e-04",
+ "loss": 1.1344,
+ "slid_loss": 1.1185,
+ "step": 324,
+ "time": 13.44
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2476e-04",
+ "loss": 1.1106,
+ "slid_loss": 1.1171,
+ "step": 325,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2514e-04",
+ "loss": 1.106,
+ "slid_loss": 1.1177,
+ "step": 326,
+ "time": 15.13
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": "1.2553e-04",
+ "loss": 1.0726,
+ "slid_loss": 1.1172,
+ "step": 327,
+ "time": 13.35
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2591e-04",
+ "loss": 1.0443,
+ "slid_loss": 1.1165,
+ "step": 328,
+ "time": 13.14
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2630e-04",
+ "loss": 1.2201,
+ "slid_loss": 1.1175,
+ "step": 329,
+ "time": 10.85
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2668e-04",
+ "loss": 0.9423,
+ "slid_loss": 1.1153,
+ "step": 330,
+ "time": 13.64
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2706e-04",
+ "loss": 1.124,
+ "slid_loss": 1.1154,
+ "step": 331,
+ "time": 13.0
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2745e-04",
+ "loss": 1.1948,
+ "slid_loss": 1.1161,
+ "step": 332,
+ "time": 14.18
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2783e-04",
+ "loss": 1.0845,
+ "slid_loss": 1.115,
+ "step": 333,
+ "time": 13.71
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2821e-04",
+ "loss": 1.0932,
+ "slid_loss": 1.1144,
+ "step": 334,
+ "time": 13.18
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2860e-04",
+ "loss": 1.1208,
+ "slid_loss": 1.1136,
+ "step": 335,
+ "time": 12.01
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2898e-04",
+ "loss": 1.2009,
+ "slid_loss": 1.1141,
+ "step": 336,
+ "time": 11.5
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2937e-04",
+ "loss": 1.0812,
+ "slid_loss": 1.113,
+ "step": 337,
+ "time": 13.75
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": "1.2975e-04",
+ "loss": 1.0424,
+ "slid_loss": 1.1114,
+ "step": 338,
+ "time": 12.27
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3013e-04",
+ "loss": 1.1397,
+ "slid_loss": 1.1113,
+ "step": 339,
+ "time": 13.57
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3052e-04",
+ "loss": 1.1018,
+ "slid_loss": 1.1107,
+ "step": 340,
+ "time": 14.13
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3090e-04",
+ "loss": 1.0822,
+ "slid_loss": 1.1097,
+ "step": 341,
+ "time": 14.58
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3129e-04",
+ "loss": 1.0471,
+ "slid_loss": 1.1084,
+ "step": 342,
+ "time": 13.32
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3167e-04",
+ "loss": 1.1075,
+ "slid_loss": 1.1077,
+ "step": 343,
+ "time": 14.26
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3205e-04",
+ "loss": 1.1103,
+ "slid_loss": 1.1078,
+ "step": 344,
+ "time": 10.91
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3244e-04",
+ "loss": 1.0956,
+ "slid_loss": 1.1072,
+ "step": 345,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3282e-04",
+ "loss": 0.9834,
+ "slid_loss": 1.1056,
+ "step": 346,
+ "time": 11.59
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3321e-04",
+ "loss": 1.0889,
+ "slid_loss": 1.1068,
+ "step": 347,
+ "time": 12.24
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": "1.3359e-04",
+ "loss": 1.0538,
+ "slid_loss": 1.1067,
+ "step": 348,
+ "time": 12.18
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3397e-04",
+ "loss": 1.0294,
+ "slid_loss": 1.1048,
+ "step": 349,
+ "time": 14.03
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3436e-04",
+ "loss": 1.0403,
+ "slid_loss": 1.1038,
+ "step": 350,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3474e-04",
+ "loss": 1.2006,
+ "slid_loss": 1.1058,
+ "step": 351,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3512e-04",
+ "loss": 1.1609,
+ "slid_loss": 1.1055,
+ "step": 352,
+ "time": 13.38
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3551e-04",
+ "loss": 1.1069,
+ "slid_loss": 1.1056,
+ "step": 353,
+ "time": 13.69
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3589e-04",
+ "loss": 0.9822,
+ "slid_loss": 1.1036,
+ "step": 354,
+ "time": 14.02
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3628e-04",
+ "loss": 1.013,
+ "slid_loss": 1.1024,
+ "step": 355,
+ "time": 14.23
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3666e-04",
+ "loss": 1.1721,
+ "slid_loss": 1.1027,
+ "step": 356,
+ "time": 13.72
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3704e-04",
+ "loss": 1.0672,
+ "slid_loss": 1.1016,
+ "step": 357,
+ "time": 13.12
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3743e-04",
+ "loss": 1.0824,
+ "slid_loss": 1.0996,
+ "step": 358,
+ "time": 13.16
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": "1.3781e-04",
+ "loss": 1.1155,
+ "slid_loss": 1.0995,
+ "step": 359,
+ "time": 12.04
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.3820e-04",
+ "loss": 1.0015,
+ "slid_loss": 1.0985,
+ "step": 360,
+ "time": 12.14
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.3858e-04",
+ "loss": 1.0656,
+ "slid_loss": 1.0964,
+ "step": 361,
+ "time": 12.31
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.3896e-04",
+ "loss": 1.2371,
+ "slid_loss": 1.0969,
+ "step": 362,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.3935e-04",
+ "loss": 1.138,
+ "slid_loss": 1.0968,
+ "step": 363,
+ "time": 13.75
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.3973e-04",
+ "loss": 1.1079,
+ "slid_loss": 1.0963,
+ "step": 364,
+ "time": 11.31
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.4012e-04",
+ "loss": 1.0055,
+ "slid_loss": 1.0958,
+ "step": 365,
+ "time": 11.94
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.4050e-04",
+ "loss": 1.1014,
+ "slid_loss": 1.0959,
+ "step": 366,
+ "time": 12.24
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.4088e-04",
+ "loss": 1.1014,
+ "slid_loss": 1.0957,
+ "step": 367,
+ "time": 13.54
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.4127e-04",
+ "loss": 1.0626,
+ "slid_loss": 1.0955,
+ "step": 368,
+ "time": 13.78
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": "1.4165e-04",
+ "loss": 1.0423,
+ "slid_loss": 1.0956,
+ "step": 369,
+ "time": 12.82
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4203e-04",
+ "loss": 1.2084,
+ "slid_loss": 1.0975,
+ "step": 370,
+ "time": 12.17
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4242e-04",
+ "loss": 1.1612,
+ "slid_loss": 1.0982,
+ "step": 371,
+ "time": 11.88
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4280e-04",
+ "loss": 1.0613,
+ "slid_loss": 1.0971,
+ "step": 372,
+ "time": 15.27
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4319e-04",
+ "loss": 1.0918,
+ "slid_loss": 1.0971,
+ "step": 373,
+ "time": 13.38
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4357e-04",
+ "loss": 0.9901,
+ "slid_loss": 1.0962,
+ "step": 374,
+ "time": 14.06
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4395e-04",
+ "loss": 1.1319,
+ "slid_loss": 1.0956,
+ "step": 375,
+ "time": 12.56
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4434e-04",
+ "loss": 1.0916,
+ "slid_loss": 1.0952,
+ "step": 376,
+ "time": 13.19
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4472e-04",
+ "loss": 1.0855,
+ "slid_loss": 1.0955,
+ "step": 377,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4511e-04",
+ "loss": 1.2371,
+ "slid_loss": 1.0971,
+ "step": 378,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": "1.4549e-04",
+ "loss": 1.044,
+ "slid_loss": 1.0967,
+ "step": 379,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4587e-04",
+ "loss": 1.1135,
+ "slid_loss": 1.097,
+ "step": 380,
+ "time": 12.89
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4626e-04",
+ "loss": 1.0429,
+ "slid_loss": 1.0972,
+ "step": 381,
+ "time": 11.21
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4664e-04",
+ "loss": 1.046,
+ "slid_loss": 1.0964,
+ "step": 382,
+ "time": 11.45
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4702e-04",
+ "loss": 1.1237,
+ "slid_loss": 1.0974,
+ "step": 383,
+ "time": 11.13
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4741e-04",
+ "loss": 1.0212,
+ "slid_loss": 1.0967,
+ "step": 384,
+ "time": 12.17
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4779e-04",
+ "loss": 0.9437,
+ "slid_loss": 1.095,
+ "step": 385,
+ "time": 13.56
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4818e-04",
+ "loss": 1.0723,
+ "slid_loss": 1.0944,
+ "step": 386,
+ "time": 14.07
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4856e-04",
+ "loss": 1.0119,
+ "slid_loss": 1.0928,
+ "step": 387,
+ "time": 13.98
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4894e-04",
+ "loss": 1.1427,
+ "slid_loss": 1.0932,
+ "step": 388,
+ "time": 14.01
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4933e-04",
+ "loss": 1.0418,
+ "slid_loss": 1.0918,
+ "step": 389,
+ "time": 13.75
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": "1.4971e-04",
+ "loss": 1.0642,
+ "slid_loss": 1.0913,
+ "step": 390,
+ "time": 13.87
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5010e-04",
+ "loss": 1.0042,
+ "slid_loss": 1.0905,
+ "step": 391,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5048e-04",
+ "loss": 0.9899,
+ "slid_loss": 1.0883,
+ "step": 392,
+ "time": 14.41
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5086e-04",
+ "loss": 1.0094,
+ "slid_loss": 1.0863,
+ "step": 393,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5125e-04",
+ "loss": 1.178,
+ "slid_loss": 1.0871,
+ "step": 394,
+ "time": 13.18
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5163e-04",
+ "loss": 1.0861,
+ "slid_loss": 1.0874,
+ "step": 395,
+ "time": 13.12
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5202e-04",
+ "loss": 0.9624,
+ "slid_loss": 1.0867,
+ "step": 396,
+ "time": 12.6
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5240e-04",
+ "loss": 1.0635,
+ "slid_loss": 1.0865,
+ "step": 397,
+ "time": 13.9
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5278e-04",
+ "loss": 1.0372,
+ "slid_loss": 1.0862,
+ "step": 398,
+ "time": 10.54
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5317e-04",
+ "loss": 1.0002,
+ "slid_loss": 1.0851,
+ "step": 399,
+ "time": 13.66
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": "1.5355e-04",
+ "loss": 1.063,
+ "slid_loss": 1.0839,
+ "step": 400,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5393e-04",
+ "loss": 1.1754,
+ "slid_loss": 1.0853,
+ "step": 401,
+ "time": 12.83
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5432e-04",
+ "loss": 1.0598,
+ "slid_loss": 1.0864,
+ "step": 402,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5470e-04",
+ "loss": 1.0806,
+ "slid_loss": 1.0859,
+ "step": 403,
+ "time": 13.68
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5509e-04",
+ "loss": 1.1851,
+ "slid_loss": 1.0867,
+ "step": 404,
+ "time": 14.14
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5547e-04",
+ "loss": 1.1202,
+ "slid_loss": 1.0868,
+ "step": 405,
+ "time": 13.46
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5585e-04",
+ "loss": 1.1635,
+ "slid_loss": 1.0878,
+ "step": 406,
+ "time": 12.14
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5624e-04",
+ "loss": 1.1358,
+ "slid_loss": 1.0877,
+ "step": 407,
+ "time": 13.34
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5662e-04",
+ "loss": 1.1069,
+ "slid_loss": 1.0875,
+ "step": 408,
+ "time": 11.44
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5701e-04",
+ "loss": 1.1895,
+ "slid_loss": 1.0883,
+ "step": 409,
+ "time": 14.11
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5739e-04",
+ "loss": 0.9738,
+ "slid_loss": 1.087,
+ "step": 410,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": "1.5777e-04",
+ "loss": 1.1221,
+ "slid_loss": 1.0873,
+ "step": 411,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.5816e-04",
+ "loss": 1.1668,
+ "slid_loss": 1.0867,
+ "step": 412,
+ "time": 13.82
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.5854e-04",
+ "loss": 1.1516,
+ "slid_loss": 1.0879,
+ "step": 413,
+ "time": 11.91
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.5893e-04",
+ "loss": 1.0854,
+ "slid_loss": 1.0883,
+ "step": 414,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.5931e-04",
+ "loss": 1.0152,
+ "slid_loss": 1.0871,
+ "step": 415,
+ "time": 13.07
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.5969e-04",
+ "loss": 1.1072,
+ "slid_loss": 1.0867,
+ "step": 416,
+ "time": 12.04
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.6008e-04",
+ "loss": 1.035,
+ "slid_loss": 1.0866,
+ "step": 417,
+ "time": 13.89
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.6046e-04",
+ "loss": 1.1327,
+ "slid_loss": 1.0876,
+ "step": 418,
+ "time": 12.61
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.6084e-04",
+ "loss": 0.9402,
+ "slid_loss": 1.0867,
+ "step": 419,
+ "time": 10.8
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.6123e-04",
+ "loss": 1.1575,
+ "slid_loss": 1.0869,
+ "step": 420,
+ "time": 13.08
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": "1.6161e-04",
+ "loss": 1.0064,
+ "slid_loss": 1.0868,
+ "step": 421,
+ "time": 12.68
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6200e-04",
+ "loss": 1.1238,
+ "slid_loss": 1.0876,
+ "step": 422,
+ "time": 13.5
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6238e-04",
+ "loss": 1.0533,
+ "slid_loss": 1.0862,
+ "step": 423,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6276e-04",
+ "loss": 1.1043,
+ "slid_loss": 1.0859,
+ "step": 424,
+ "time": 11.9
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6315e-04",
+ "loss": 1.0688,
+ "slid_loss": 1.0855,
+ "step": 425,
+ "time": 12.98
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6353e-04",
+ "loss": 1.0692,
+ "slid_loss": 1.0852,
+ "step": 426,
+ "time": 13.77
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6392e-04",
+ "loss": 1.0817,
+ "slid_loss": 1.0853,
+ "step": 427,
+ "time": 12.28
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6430e-04",
+ "loss": 0.9885,
+ "slid_loss": 1.0847,
+ "step": 428,
+ "time": 13.05
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6468e-04",
+ "loss": 1.0434,
+ "slid_loss": 1.0829,
+ "step": 429,
+ "time": 12.83
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6507e-04",
+ "loss": 1.1381,
+ "slid_loss": 1.0849,
+ "step": 430,
+ "time": 13.82
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6545e-04",
+ "loss": 0.9673,
+ "slid_loss": 1.0833,
+ "step": 431,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": "1.6583e-04",
+ "loss": 1.1323,
+ "slid_loss": 1.0827,
+ "step": 432,
+ "time": 13.27
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6622e-04",
+ "loss": 1.0484,
+ "slid_loss": 1.0823,
+ "step": 433,
+ "time": 14.47
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6660e-04",
+ "loss": 1.0162,
+ "slid_loss": 1.0816,
+ "step": 434,
+ "time": 10.84
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6699e-04",
+ "loss": 1.0345,
+ "slid_loss": 1.0807,
+ "step": 435,
+ "time": 11.46
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6737e-04",
+ "loss": 1.0991,
+ "slid_loss": 1.0797,
+ "step": 436,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6775e-04",
+ "loss": 1.1863,
+ "slid_loss": 1.0807,
+ "step": 437,
+ "time": 13.83
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6814e-04",
+ "loss": 1.184,
+ "slid_loss": 1.0821,
+ "step": 438,
+ "time": 12.91
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6852e-04",
+ "loss": 0.9225,
+ "slid_loss": 1.08,
+ "step": 439,
+ "time": 13.16
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6891e-04",
+ "loss": 1.0758,
+ "slid_loss": 1.0797,
+ "step": 440,
+ "time": 13.3
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6929e-04",
+ "loss": 1.0419,
+ "slid_loss": 1.0793,
+ "step": 441,
+ "time": 13.85
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": "1.6967e-04",
+ "loss": 0.9558,
+ "slid_loss": 1.0784,
+ "step": 442,
+ "time": 12.44
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7006e-04",
+ "loss": 1.0148,
+ "slid_loss": 1.0775,
+ "step": 443,
+ "time": 12.38
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7044e-04",
+ "loss": 1.1866,
+ "slid_loss": 1.0782,
+ "step": 444,
+ "time": 11.37
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7083e-04",
+ "loss": 1.0033,
+ "slid_loss": 1.0773,
+ "step": 445,
+ "time": 13.22
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7121e-04",
+ "loss": 1.1825,
+ "slid_loss": 1.0793,
+ "step": 446,
+ "time": 11.81
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7159e-04",
+ "loss": 1.0948,
+ "slid_loss": 1.0794,
+ "step": 447,
+ "time": 11.52
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7198e-04",
+ "loss": 1.1319,
+ "slid_loss": 1.0801,
+ "step": 448,
+ "time": 13.96
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7236e-04",
+ "loss": 1.0548,
+ "slid_loss": 1.0804,
+ "step": 449,
+ "time": 13.66
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7274e-04",
+ "loss": 1.0546,
+ "slid_loss": 1.0805,
+ "step": 450,
+ "time": 11.64
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7313e-04",
+ "loss": 1.1605,
+ "slid_loss": 1.0801,
+ "step": 451,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": "1.7351e-04",
+ "loss": 1.1163,
+ "slid_loss": 1.0797,
+ "step": 452,
+ "time": 12.7
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7390e-04",
+ "loss": 1.0139,
+ "slid_loss": 1.0788,
+ "step": 453,
+ "time": 12.74
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7428e-04",
+ "loss": 1.0758,
+ "slid_loss": 1.0797,
+ "step": 454,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7466e-04",
+ "loss": 1.1292,
+ "slid_loss": 1.0809,
+ "step": 455,
+ "time": 13.01
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7505e-04",
+ "loss": 0.9871,
+ "slid_loss": 1.079,
+ "step": 456,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7543e-04",
+ "loss": 1.0647,
+ "slid_loss": 1.079,
+ "step": 457,
+ "time": 13.35
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7582e-04",
+ "loss": 1.1903,
+ "slid_loss": 1.0801,
+ "step": 458,
+ "time": 12.78
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7620e-04",
+ "loss": 0.982,
+ "slid_loss": 1.0787,
+ "step": 459,
+ "time": 12.75
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7658e-04",
+ "loss": 1.1235,
+ "slid_loss": 1.08,
+ "step": 460,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7697e-04",
+ "loss": 1.0177,
+ "slid_loss": 1.0795,
+ "step": 461,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7735e-04",
+ "loss": 1.0484,
+ "slid_loss": 1.0776,
+ "step": 462,
+ "time": 13.0
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": "1.7774e-04",
+ "loss": 1.0731,
+ "slid_loss": 1.0769,
+ "step": 463,
+ "time": 14.16
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.7812e-04",
+ "loss": 1.1799,
+ "slid_loss": 1.0777,
+ "step": 464,
+ "time": 13.77
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.7850e-04",
+ "loss": 0.997,
+ "slid_loss": 1.0776,
+ "step": 465,
+ "time": 12.27
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.7889e-04",
+ "loss": 1.2199,
+ "slid_loss": 1.0788,
+ "step": 466,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.7927e-04",
+ "loss": 1.0571,
+ "slid_loss": 1.0783,
+ "step": 467,
+ "time": 13.76
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.7965e-04",
+ "loss": 1.006,
+ "slid_loss": 1.0777,
+ "step": 468,
+ "time": 14.02
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.8004e-04",
+ "loss": 1.1346,
+ "slid_loss": 1.0787,
+ "step": 469,
+ "time": 11.23
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.8042e-04",
+ "loss": 1.0168,
+ "slid_loss": 1.0768,
+ "step": 470,
+ "time": 11.57
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.8081e-04",
+ "loss": 1.061,
+ "slid_loss": 1.0758,
+ "step": 471,
+ "time": 13.29
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.8119e-04",
+ "loss": 1.1233,
+ "slid_loss": 1.0764,
+ "step": 472,
+ "time": 13.76
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": "1.8157e-04",
+ "loss": 1.1175,
+ "slid_loss": 1.0766,
+ "step": 473,
+ "time": 12.83
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8196e-04",
+ "loss": 1.074,
+ "slid_loss": 1.0775,
+ "step": 474,
+ "time": 11.96
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8234e-04",
+ "loss": 0.9853,
+ "slid_loss": 1.076,
+ "step": 475,
+ "time": 14.03
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8273e-04",
+ "loss": 1.0981,
+ "slid_loss": 1.0761,
+ "step": 476,
+ "time": 14.62
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8311e-04",
+ "loss": 1.0868,
+ "slid_loss": 1.0761,
+ "step": 477,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8349e-04",
+ "loss": 1.0221,
+ "slid_loss": 1.0739,
+ "step": 478,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8388e-04",
+ "loss": 1.1662,
+ "slid_loss": 1.0751,
+ "step": 479,
+ "time": 11.83
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8426e-04",
+ "loss": 1.0696,
+ "slid_loss": 1.0747,
+ "step": 480,
+ "time": 14.11
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8464e-04",
+ "loss": 0.9884,
+ "slid_loss": 1.0742,
+ "step": 481,
+ "time": 12.88
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8503e-04",
+ "loss": 1.0783,
+ "slid_loss": 1.0745,
+ "step": 482,
+ "time": 11.91
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8541e-04",
+ "loss": 1.0157,
+ "slid_loss": 1.0734,
+ "step": 483,
+ "time": 13.19
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": "1.8580e-04",
+ "loss": 1.1623,
+ "slid_loss": 1.0748,
+ "step": 484,
+ "time": 14.2
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8618e-04",
+ "loss": 1.0329,
+ "slid_loss": 1.0757,
+ "step": 485,
+ "time": 12.78
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8656e-04",
+ "loss": 1.0709,
+ "slid_loss": 1.0757,
+ "step": 486,
+ "time": 12.53
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8695e-04",
+ "loss": 1.0532,
+ "slid_loss": 1.0761,
+ "step": 487,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8733e-04",
+ "loss": 1.0888,
+ "slid_loss": 1.0756,
+ "step": 488,
+ "time": 13.27
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8772e-04",
+ "loss": 0.9069,
+ "slid_loss": 1.0742,
+ "step": 489,
+ "time": 14.26
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8810e-04",
+ "loss": 0.9678,
+ "slid_loss": 1.0733,
+ "step": 490,
+ "time": 13.49
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8848e-04",
+ "loss": 0.9874,
+ "slid_loss": 1.0731,
+ "step": 491,
+ "time": 11.78
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8887e-04",
+ "loss": 1.1192,
+ "slid_loss": 1.0744,
+ "step": 492,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8925e-04",
+ "loss": 1.0061,
+ "slid_loss": 1.0743,
+ "step": 493,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": "1.8964e-04",
+ "loss": 0.9751,
+ "slid_loss": 1.0723,
+ "step": 494,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9002e-04",
+ "loss": 1.1274,
+ "slid_loss": 1.0727,
+ "step": 495,
+ "time": 13.94
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9040e-04",
+ "loss": 1.0814,
+ "slid_loss": 1.0739,
+ "step": 496,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9079e-04",
+ "loss": 1.0962,
+ "slid_loss": 1.0743,
+ "step": 497,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9117e-04",
+ "loss": 1.117,
+ "slid_loss": 1.075,
+ "step": 498,
+ "time": 13.34
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9155e-04",
+ "loss": 1.0042,
+ "slid_loss": 1.0751,
+ "step": 499,
+ "time": 12.48
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9194e-04",
+ "loss": 0.9939,
+ "slid_loss": 1.0744,
+ "step": 500,
+ "time": 12.04
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9232e-04",
+ "loss": 0.9867,
+ "slid_loss": 1.0725,
+ "step": 501,
+ "time": 12.97
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9271e-04",
+ "loss": 1.053,
+ "slid_loss": 1.0724,
+ "step": 502,
+ "time": 13.34
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9309e-04",
+ "loss": 1.1517,
+ "slid_loss": 1.0732,
+ "step": 503,
+ "time": 11.25
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": "1.9347e-04",
+ "loss": 1.1175,
+ "slid_loss": 1.0725,
+ "step": 504,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9386e-04",
+ "loss": 0.9554,
+ "slid_loss": 1.0708,
+ "step": 505,
+ "time": 10.67
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9424e-04",
+ "loss": 1.0071,
+ "slid_loss": 1.0693,
+ "step": 506,
+ "time": 11.57
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9463e-04",
+ "loss": 1.0807,
+ "slid_loss": 1.0687,
+ "step": 507,
+ "time": 12.99
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9501e-04",
+ "loss": 1.1357,
+ "slid_loss": 1.069,
+ "step": 508,
+ "time": 13.84
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9539e-04",
+ "loss": 1.1089,
+ "slid_loss": 1.0682,
+ "step": 509,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9578e-04",
+ "loss": 1.123,
+ "slid_loss": 1.0697,
+ "step": 510,
+ "time": 14.04
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9616e-04",
+ "loss": 1.018,
+ "slid_loss": 1.0686,
+ "step": 511,
+ "time": 14.48
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9655e-04",
+ "loss": 1.0768,
+ "slid_loss": 1.0677,
+ "step": 512,
+ "time": 12.95
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9693e-04",
+ "loss": 1.1262,
+ "slid_loss": 1.0675,
+ "step": 513,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9731e-04",
+ "loss": 1.1045,
+ "slid_loss": 1.0677,
+ "step": 514,
+ "time": 12.71
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": "1.9770e-04",
+ "loss": 1.1387,
+ "slid_loss": 1.0689,
+ "step": 515,
+ "time": 12.43
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "1.9808e-04",
+ "loss": 1.0626,
+ "slid_loss": 1.0685,
+ "step": 516,
+ "time": 14.1
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "1.9846e-04",
+ "loss": 1.0563,
+ "slid_loss": 1.0687,
+ "step": 517,
+ "time": 13.01
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "1.9885e-04",
+ "loss": 1.0713,
+ "slid_loss": 1.0681,
+ "step": 518,
+ "time": 12.31
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "1.9923e-04",
+ "loss": 1.1455,
+ "slid_loss": 1.0701,
+ "step": 519,
+ "time": 12.91
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "1.9962e-04",
+ "loss": 1.0585,
+ "slid_loss": 1.0691,
+ "step": 520,
+ "time": 13.81
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "2.0000e-04",
+ "loss": 0.9356,
+ "slid_loss": 1.0684,
+ "step": 521,
+ "time": 13.82
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0925,
+ "slid_loss": 1.0681,
+ "step": 522,
+ "time": 13.31
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.1168,
+ "slid_loss": 1.0688,
+ "step": 523,
+ "time": 13.47
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.1304,
+ "slid_loss": 1.069,
+ "step": 524,
+ "time": 13.76
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.02,
+ "slid_loss": 1.0685,
+ "step": 525,
+ "time": 11.38
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.1233,
+ "slid_loss": 1.0691,
+ "step": 526,
+ "time": 13.95
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.179,
+ "slid_loss": 1.07,
+ "step": 527,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0579,
+ "slid_loss": 1.0707,
+ "step": 528,
+ "time": 12.8
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0605,
+ "slid_loss": 1.0709,
+ "step": 529,
+ "time": 13.56
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.1178,
+ "slid_loss": 1.0707,
+ "step": 530,
+ "time": 13.9
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0711,
+ "slid_loss": 1.0717,
+ "step": 531,
+ "time": 12.17
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 0.9443,
+ "slid_loss": 1.0699,
+ "step": 532,
+ "time": 11.7
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 0.9551,
+ "slid_loss": 1.0689,
+ "step": 533,
+ "time": 13.55
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0341,
+ "slid_loss": 1.0691,
+ "step": 534,
+ "time": 11.13
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0816,
+ "slid_loss": 1.0696,
+ "step": 535,
+ "time": 12.56
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0912,
+ "slid_loss": 1.0695,
+ "step": 536,
+ "time": 13.71
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0296,
+ "slid_loss": 1.0679,
+ "step": 537,
+ "time": 14.05
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0475,
+ "slid_loss": 1.0666,
+ "step": 538,
+ "time": 10.59
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "2.0000e-04",
+ "loss": 0.9315,
+ "slid_loss": 1.0667,
+ "step": 539,
+ "time": 13.78
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.018,
+ "slid_loss": 1.0661,
+ "step": 540,
+ "time": 14.04
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0196,
+ "slid_loss": 1.0659,
+ "step": 541,
+ "time": 12.37
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "2.0000e-04",
+ "loss": 1.0688,
+ "slid_loss": 1.067,
+ "step": 542,
+ "time": 14.49
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.1505,
+ "slid_loss": 1.0683,
+ "step": 543,
+ "time": 14.0
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.1506,
+ "slid_loss": 1.068,
+ "step": 544,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.199,
+ "slid_loss": 1.0699,
+ "step": 545,
+ "time": 12.84
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.0458,
+ "slid_loss": 1.0686,
+ "step": 546,
+ "time": 12.29
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 0.9727,
+ "slid_loss": 1.0674,
+ "step": 547,
+ "time": 14.12
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.1303,
+ "slid_loss": 1.0673,
+ "step": 548,
+ "time": 12.39
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.0758,
+ "slid_loss": 1.0675,
+ "step": 549,
+ "time": 11.91
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.0393,
+ "slid_loss": 1.0674,
+ "step": 550,
+ "time": 12.28
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.1124,
+ "slid_loss": 1.0669,
+ "step": 551,
+ "time": 13.45
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 0.9769,
+ "slid_loss": 1.0655,
+ "step": 552,
+ "time": 13.26
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.0826,
+ "slid_loss": 1.0662,
+ "step": 553,
+ "time": 13.78
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.0468,
+ "slid_loss": 1.0659,
+ "step": 554,
+ "time": 14.07
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.1096,
+ "slid_loss": 1.0657,
+ "step": 555,
+ "time": 11.69
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.0225,
+ "slid_loss": 1.0661,
+ "step": 556,
+ "time": 13.78
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9999e-04",
+ "loss": 1.1107,
+ "slid_loss": 1.0665,
+ "step": 557,
+ "time": 13.5
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 1.0891,
+ "slid_loss": 1.0655,
+ "step": 558,
+ "time": 12.46
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 1.0006,
+ "slid_loss": 1.0657,
+ "step": 559,
+ "time": 13.51
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 1.0981,
+ "slid_loss": 1.0655,
+ "step": 560,
+ "time": 11.66
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 1.0809,
+ "slid_loss": 1.0661,
+ "step": 561,
+ "time": 11.55
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 1.0517,
+ "slid_loss": 1.0661,
+ "step": 562,
+ "time": 13.77
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 0.9225,
+ "slid_loss": 1.0646,
+ "step": 563,
+ "time": 13.75
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 1.0561,
+ "slid_loss": 1.0634,
+ "step": 564,
+ "time": 12.71
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 1.103,
+ "slid_loss": 1.0644,
+ "step": 565,
+ "time": 13.82
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 0.9465,
+ "slid_loss": 1.0617,
+ "step": 566,
+ "time": 13.9
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": "1.9998e-04",
+ "loss": 0.9362,
+ "slid_loss": 1.0605,
+ "step": 567,
+ "time": 12.74
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9998e-04",
+ "loss": 1.0916,
+ "slid_loss": 1.0613,
+ "step": 568,
+ "time": 13.12
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9997e-04",
+ "loss": 1.073,
+ "slid_loss": 1.0607,
+ "step": 569,
+ "time": 13.44
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9997e-04",
+ "loss": 0.9971,
+ "slid_loss": 1.0605,
+ "step": 570,
+ "time": 13.47
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9997e-04",
+ "loss": 1.0541,
+ "slid_loss": 1.0605,
+ "step": 571,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9997e-04",
+ "loss": 1.0016,
+ "slid_loss": 1.0592,
+ "step": 572,
+ "time": 13.34
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9997e-04",
+ "loss": 1.0541,
+ "slid_loss": 1.0586,
+ "step": 573,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9997e-04",
+ "loss": 0.962,
+ "slid_loss": 1.0575,
+ "step": 574,
+ "time": 13.98
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9997e-04",
+ "loss": 0.9362,
+ "slid_loss": 1.057,
+ "step": 575,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9997e-04",
+ "loss": 0.9888,
+ "slid_loss": 1.0559,
+ "step": 576,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": "1.9996e-04",
+ "loss": 1.0207,
+ "slid_loss": 1.0553,
+ "step": 577,
+ "time": 12.7
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9996e-04",
+ "loss": 1.1892,
+ "slid_loss": 1.0569,
+ "step": 578,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9996e-04",
+ "loss": 1.0915,
+ "slid_loss": 1.0562,
+ "step": 579,
+ "time": 13.33
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9996e-04",
+ "loss": 0.9794,
+ "slid_loss": 1.0553,
+ "step": 580,
+ "time": 13.23
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9996e-04",
+ "loss": 1.0321,
+ "slid_loss": 1.0557,
+ "step": 581,
+ "time": 13.89
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9996e-04",
+ "loss": 1.0096,
+ "slid_loss": 1.055,
+ "step": 582,
+ "time": 12.89
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9996e-04",
+ "loss": 1.1575,
+ "slid_loss": 1.0564,
+ "step": 583,
+ "time": 13.83
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9996e-04",
+ "loss": 1.1859,
+ "slid_loss": 1.0567,
+ "step": 584,
+ "time": 13.31
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9995e-04",
+ "loss": 0.9553,
+ "slid_loss": 1.0559,
+ "step": 585,
+ "time": 14.0
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9995e-04",
+ "loss": 0.9196,
+ "slid_loss": 1.0544,
+ "step": 586,
+ "time": 12.04
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9995e-04",
+ "loss": 1.0118,
+ "slid_loss": 1.054,
+ "step": 587,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": "1.9995e-04",
+ "loss": 1.0194,
+ "slid_loss": 1.0533,
+ "step": 588,
+ "time": 13.22
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9995e-04",
+ "loss": 1.0106,
+ "slid_loss": 1.0543,
+ "step": 589,
+ "time": 12.11
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9995e-04",
+ "loss": 0.9009,
+ "slid_loss": 1.0536,
+ "step": 590,
+ "time": 13.16
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9994e-04",
+ "loss": 0.9611,
+ "slid_loss": 1.0534,
+ "step": 591,
+ "time": 12.29
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9994e-04",
+ "loss": 0.9321,
+ "slid_loss": 1.0515,
+ "step": 592,
+ "time": 13.22
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9994e-04",
+ "loss": 0.9615,
+ "slid_loss": 1.0511,
+ "step": 593,
+ "time": 14.84
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9994e-04",
+ "loss": 0.9307,
+ "slid_loss": 1.0506,
+ "step": 594,
+ "time": 11.31
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9994e-04",
+ "loss": 1.1413,
+ "slid_loss": 1.0508,
+ "step": 595,
+ "time": 13.46
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9994e-04",
+ "loss": 1.0796,
+ "slid_loss": 1.0507,
+ "step": 596,
+ "time": 12.75
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9994e-04",
+ "loss": 0.9417,
+ "slid_loss": 1.0492,
+ "step": 597,
+ "time": 13.29
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": "1.9993e-04",
+ "loss": 1.0056,
+ "slid_loss": 1.0481,
+ "step": 598,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9993e-04",
+ "loss": 1.0508,
+ "slid_loss": 1.0486,
+ "step": 599,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9993e-04",
+ "loss": 1.0532,
+ "slid_loss": 1.0491,
+ "step": 600,
+ "time": 14.14
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9993e-04",
+ "loss": 1.1259,
+ "slid_loss": 1.0505,
+ "step": 601,
+ "time": 13.97
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9993e-04",
+ "loss": 1.0563,
+ "slid_loss": 1.0506,
+ "step": 602,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9992e-04",
+ "loss": 1.0944,
+ "slid_loss": 1.05,
+ "step": 603,
+ "time": 12.36
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9992e-04",
+ "loss": 1.0569,
+ "slid_loss": 1.0494,
+ "step": 604,
+ "time": 12.2
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9992e-04",
+ "loss": 1.0496,
+ "slid_loss": 1.0503,
+ "step": 605,
+ "time": 13.44
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9992e-04",
+ "loss": 1.0248,
+ "slid_loss": 1.0505,
+ "step": 606,
+ "time": 12.19
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9992e-04",
+ "loss": 1.0156,
+ "slid_loss": 1.0499,
+ "step": 607,
+ "time": 12.83
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": "1.9991e-04",
+ "loss": 1.1058,
+ "slid_loss": 1.0496,
+ "step": 608,
+ "time": 14.04
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9991e-04",
+ "loss": 1.1543,
+ "slid_loss": 1.05,
+ "step": 609,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9991e-04",
+ "loss": 1.0371,
+ "slid_loss": 1.0492,
+ "step": 610,
+ "time": 14.38
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9991e-04",
+ "loss": 1.11,
+ "slid_loss": 1.0501,
+ "step": 611,
+ "time": 13.64
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9991e-04",
+ "loss": 1.0472,
+ "slid_loss": 1.0498,
+ "step": 612,
+ "time": 13.86
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9990e-04",
+ "loss": 1.0337,
+ "slid_loss": 1.0489,
+ "step": 613,
+ "time": 12.24
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9990e-04",
+ "loss": 0.9967,
+ "slid_loss": 1.0478,
+ "step": 614,
+ "time": 11.89
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9990e-04",
+ "loss": 1.036,
+ "slid_loss": 1.0468,
+ "step": 615,
+ "time": 13.44
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9990e-04",
+ "loss": 0.988,
+ "slid_loss": 1.046,
+ "step": 616,
+ "time": 11.66
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9990e-04",
+ "loss": 1.01,
+ "slid_loss": 1.0455,
+ "step": 617,
+ "time": 13.3
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9989e-04",
+ "loss": 1.0468,
+ "slid_loss": 1.0453,
+ "step": 618,
+ "time": 14.29
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": "1.9989e-04",
+ "loss": 1.0455,
+ "slid_loss": 1.0443,
+ "step": 619,
+ "time": 13.07
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9989e-04",
+ "loss": 1.0149,
+ "slid_loss": 1.0439,
+ "step": 620,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9989e-04",
+ "loss": 1.116,
+ "slid_loss": 1.0457,
+ "step": 621,
+ "time": 12.83
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9989e-04",
+ "loss": 1.0674,
+ "slid_loss": 1.0454,
+ "step": 622,
+ "time": 13.72
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9988e-04",
+ "loss": 0.9946,
+ "slid_loss": 1.0442,
+ "step": 623,
+ "time": 14.18
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9988e-04",
+ "loss": 0.946,
+ "slid_loss": 1.0423,
+ "step": 624,
+ "time": 12.69
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9988e-04",
+ "loss": 1.0845,
+ "slid_loss": 1.043,
+ "step": 625,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9988e-04",
+ "loss": 1.0035,
+ "slid_loss": 1.0418,
+ "step": 626,
+ "time": 12.73
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9987e-04",
+ "loss": 1.0176,
+ "slid_loss": 1.0402,
+ "step": 627,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9987e-04",
+ "loss": 1.0003,
+ "slid_loss": 1.0396,
+ "step": 628,
+ "time": 11.22
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": "1.9987e-04",
+ "loss": 1.0599,
+ "slid_loss": 1.0396,
+ "step": 629,
+ "time": 12.88
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9987e-04",
+ "loss": 1.1429,
+ "slid_loss": 1.0398,
+ "step": 630,
+ "time": 13.28
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9986e-04",
+ "loss": 0.9407,
+ "slid_loss": 1.0385,
+ "step": 631,
+ "time": 11.73
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9986e-04",
+ "loss": 1.0993,
+ "slid_loss": 1.0401,
+ "step": 632,
+ "time": 13.33
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9986e-04",
+ "loss": 1.0566,
+ "slid_loss": 1.0411,
+ "step": 633,
+ "time": 13.19
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9986e-04",
+ "loss": 0.9537,
+ "slid_loss": 1.0403,
+ "step": 634,
+ "time": 12.92
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9985e-04",
+ "loss": 1.1051,
+ "slid_loss": 1.0405,
+ "step": 635,
+ "time": 12.84
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9985e-04",
+ "loss": 1.0919,
+ "slid_loss": 1.0405,
+ "step": 636,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9985e-04",
+ "loss": 0.9882,
+ "slid_loss": 1.0401,
+ "step": 637,
+ "time": 11.73
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9985e-04",
+ "loss": 1.1086,
+ "slid_loss": 1.0407,
+ "step": 638,
+ "time": 12.59
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9984e-04",
+ "loss": 1.0329,
+ "slid_loss": 1.0418,
+ "step": 639,
+ "time": 12.82
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": "1.9984e-04",
+ "loss": 1.0164,
+ "slid_loss": 1.0417,
+ "step": 640,
+ "time": 13.29
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9984e-04",
+ "loss": 1.0905,
+ "slid_loss": 1.0425,
+ "step": 641,
+ "time": 14.2
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9984e-04",
+ "loss": 1.127,
+ "slid_loss": 1.043,
+ "step": 642,
+ "time": 13.91
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9983e-04",
+ "loss": 1.1271,
+ "slid_loss": 1.0428,
+ "step": 643,
+ "time": 13.59
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9983e-04",
+ "loss": 1.078,
+ "slid_loss": 1.0421,
+ "step": 644,
+ "time": 13.05
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9983e-04",
+ "loss": 1.0316,
+ "slid_loss": 1.0404,
+ "step": 645,
+ "time": 13.47
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9982e-04",
+ "loss": 0.9629,
+ "slid_loss": 1.0396,
+ "step": 646,
+ "time": 12.76
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9982e-04",
+ "loss": 0.9961,
+ "slid_loss": 1.0398,
+ "step": 647,
+ "time": 13.45
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9982e-04",
+ "loss": 1.0788,
+ "slid_loss": 1.0393,
+ "step": 648,
+ "time": 11.75
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9982e-04",
+ "loss": 1.0963,
+ "slid_loss": 1.0395,
+ "step": 649,
+ "time": 11.44
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": "1.9981e-04",
+ "loss": 1.0878,
+ "slid_loss": 1.04,
+ "step": 650,
+ "time": 13.24
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9981e-04",
+ "loss": 1.0527,
+ "slid_loss": 1.0394,
+ "step": 651,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9981e-04",
+ "loss": 0.9772,
+ "slid_loss": 1.0394,
+ "step": 652,
+ "time": 13.61
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9980e-04",
+ "loss": 1.085,
+ "slid_loss": 1.0394,
+ "step": 653,
+ "time": 13.82
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9980e-04",
+ "loss": 1.0466,
+ "slid_loss": 1.0394,
+ "step": 654,
+ "time": 13.59
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9980e-04",
+ "loss": 1.0012,
+ "slid_loss": 1.0383,
+ "step": 655,
+ "time": 11.44
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9980e-04",
+ "loss": 1.0059,
+ "slid_loss": 1.0382,
+ "step": 656,
+ "time": 13.54
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9979e-04",
+ "loss": 1.0011,
+ "slid_loss": 1.0371,
+ "step": 657,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9979e-04",
+ "loss": 1.0159,
+ "slid_loss": 1.0363,
+ "step": 658,
+ "time": 12.09
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9979e-04",
+ "loss": 1.1605,
+ "slid_loss": 1.0379,
+ "step": 659,
+ "time": 13.28
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9978e-04",
+ "loss": 1.0338,
+ "slid_loss": 1.0373,
+ "step": 660,
+ "time": 12.07
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": "1.9978e-04",
+ "loss": 1.0183,
+ "slid_loss": 1.0367,
+ "step": 661,
+ "time": 13.55
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9978e-04",
+ "loss": 0.998,
+ "slid_loss": 1.0361,
+ "step": 662,
+ "time": 13.95
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9977e-04",
+ "loss": 1.0519,
+ "slid_loss": 1.0374,
+ "step": 663,
+ "time": 12.42
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9977e-04",
+ "loss": 1.134,
+ "slid_loss": 1.0382,
+ "step": 664,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9977e-04",
+ "loss": 1.012,
+ "slid_loss": 1.0373,
+ "step": 665,
+ "time": 13.24
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9976e-04",
+ "loss": 0.9695,
+ "slid_loss": 1.0375,
+ "step": 666,
+ "time": 12.78
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9976e-04",
+ "loss": 0.9398,
+ "slid_loss": 1.0376,
+ "step": 667,
+ "time": 13.42
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9976e-04",
+ "loss": 0.9892,
+ "slid_loss": 1.0365,
+ "step": 668,
+ "time": 12.93
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9975e-04",
+ "loss": 1.0776,
+ "slid_loss": 1.0366,
+ "step": 669,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9975e-04",
+ "loss": 0.9244,
+ "slid_loss": 1.0358,
+ "step": 670,
+ "time": 14.82
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": "1.9975e-04",
+ "loss": 1.0958,
+ "slid_loss": 1.0363,
+ "step": 671,
+ "time": 13.62
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9974e-04",
+ "loss": 1.11,
+ "slid_loss": 1.0373,
+ "step": 672,
+ "time": 10.89
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9974e-04",
+ "loss": 1.0657,
+ "slid_loss": 1.0375,
+ "step": 673,
+ "time": 13.87
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9974e-04",
+ "loss": 0.9387,
+ "slid_loss": 1.0372,
+ "step": 674,
+ "time": 13.0
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9973e-04",
+ "loss": 1.012,
+ "slid_loss": 1.038,
+ "step": 675,
+ "time": 14.32
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9973e-04",
+ "loss": 0.9642,
+ "slid_loss": 1.0377,
+ "step": 676,
+ "time": 12.75
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9973e-04",
+ "loss": 0.992,
+ "slid_loss": 1.0375,
+ "step": 677,
+ "time": 12.78
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9972e-04",
+ "loss": 1.1429,
+ "slid_loss": 1.037,
+ "step": 678,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9972e-04",
+ "loss": 1.0107,
+ "slid_loss": 1.0362,
+ "step": 679,
+ "time": 13.95
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9972e-04",
+ "loss": 1.0233,
+ "slid_loss": 1.0366,
+ "step": 680,
+ "time": 13.34
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": "1.9971e-04",
+ "loss": 1.025,
+ "slid_loss": 1.0366,
+ "step": 681,
+ "time": 14.06
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9971e-04",
+ "loss": 0.8827,
+ "slid_loss": 1.0353,
+ "step": 682,
+ "time": 13.64
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9971e-04",
+ "loss": 1.0185,
+ "slid_loss": 1.0339,
+ "step": 683,
+ "time": 12.54
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9970e-04",
+ "loss": 1.0916,
+ "slid_loss": 1.0329,
+ "step": 684,
+ "time": 11.81
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9970e-04",
+ "loss": 1.1512,
+ "slid_loss": 1.0349,
+ "step": 685,
+ "time": 13.22
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9969e-04",
+ "loss": 1.033,
+ "slid_loss": 1.036,
+ "step": 686,
+ "time": 12.2
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9969e-04",
+ "loss": 1.0512,
+ "slid_loss": 1.0364,
+ "step": 687,
+ "time": 13.61
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9969e-04",
+ "loss": 1.001,
+ "slid_loss": 1.0363,
+ "step": 688,
+ "time": 12.99
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9968e-04",
+ "loss": 1.0962,
+ "slid_loss": 1.0371,
+ "step": 689,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9968e-04",
+ "loss": 1.0694,
+ "slid_loss": 1.0388,
+ "step": 690,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9968e-04",
+ "loss": 1.0784,
+ "slid_loss": 1.04,
+ "step": 691,
+ "time": 12.79
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": "1.9967e-04",
+ "loss": 1.014,
+ "slid_loss": 1.0408,
+ "step": 692,
+ "time": 13.63
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9967e-04",
+ "loss": 1.0197,
+ "slid_loss": 1.0414,
+ "step": 693,
+ "time": 13.47
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9966e-04",
+ "loss": 0.9544,
+ "slid_loss": 1.0416,
+ "step": 694,
+ "time": 13.76
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9966e-04",
+ "loss": 1.0017,
+ "slid_loss": 1.0402,
+ "step": 695,
+ "time": 11.28
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9966e-04",
+ "loss": 1.0345,
+ "slid_loss": 1.0398,
+ "step": 696,
+ "time": 12.56
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9965e-04",
+ "loss": 1.0529,
+ "slid_loss": 1.0409,
+ "step": 697,
+ "time": 12.28
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9965e-04",
+ "loss": 0.9044,
+ "slid_loss": 1.0399,
+ "step": 698,
+ "time": 12.36
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9964e-04",
+ "loss": 1.0369,
+ "slid_loss": 1.0397,
+ "step": 699,
+ "time": 12.27
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9964e-04",
+ "loss": 1.0728,
+ "slid_loss": 1.0399,
+ "step": 700,
+ "time": 13.69
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9964e-04",
+ "loss": 0.9856,
+ "slid_loss": 1.0385,
+ "step": 701,
+ "time": 13.58
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": "1.9963e-04",
+ "loss": 0.9269,
+ "slid_loss": 1.0372,
+ "step": 702,
+ "time": 11.8
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9963e-04",
+ "loss": 0.9148,
+ "slid_loss": 1.0354,
+ "step": 703,
+ "time": 12.84
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9962e-04",
+ "loss": 1.0185,
+ "slid_loss": 1.035,
+ "step": 704,
+ "time": 11.28
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9962e-04",
+ "loss": 1.0797,
+ "slid_loss": 1.0353,
+ "step": 705,
+ "time": 11.63
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9962e-04",
+ "loss": 1.0627,
+ "slid_loss": 1.0357,
+ "step": 706,
+ "time": 13.45
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9961e-04",
+ "loss": 1.0362,
+ "slid_loss": 1.0359,
+ "step": 707,
+ "time": 12.82
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9961e-04",
+ "loss": 1.018,
+ "slid_loss": 1.035,
+ "step": 708,
+ "time": 13.81
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9960e-04",
+ "loss": 0.9747,
+ "slid_loss": 1.0332,
+ "step": 709,
+ "time": 13.32
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9960e-04",
+ "loss": 1.1148,
+ "slid_loss": 1.034,
+ "step": 710,
+ "time": 13.54
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9959e-04",
+ "loss": 0.9513,
+ "slid_loss": 1.0324,
+ "step": 711,
+ "time": 14.44
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9959e-04",
+ "loss": 1.0239,
+ "slid_loss": 1.0322,
+ "step": 712,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": "1.9959e-04",
+ "loss": 0.9835,
+ "slid_loss": 1.0317,
+ "step": 713,
+ "time": 13.59
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9958e-04",
+ "loss": 1.0818,
+ "slid_loss": 1.0326,
+ "step": 714,
+ "time": 13.07
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9958e-04",
+ "loss": 1.0648,
+ "slid_loss": 1.0328,
+ "step": 715,
+ "time": 13.87
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9957e-04",
+ "loss": 1.0765,
+ "slid_loss": 1.0337,
+ "step": 716,
+ "time": 12.73
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9957e-04",
+ "loss": 1.1218,
+ "slid_loss": 1.0348,
+ "step": 717,
+ "time": 11.81
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9956e-04",
+ "loss": 1.0908,
+ "slid_loss": 1.0353,
+ "step": 718,
+ "time": 13.83
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9956e-04",
+ "loss": 0.9488,
+ "slid_loss": 1.0343,
+ "step": 719,
+ "time": 13.42
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9956e-04",
+ "loss": 1.0339,
+ "slid_loss": 1.0345,
+ "step": 720,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9955e-04",
+ "loss": 0.9137,
+ "slid_loss": 1.0325,
+ "step": 721,
+ "time": 11.54
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9955e-04",
+ "loss": 1.0983,
+ "slid_loss": 1.0328,
+ "step": 722,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": "1.9954e-04",
+ "loss": 0.9561,
+ "slid_loss": 1.0324,
+ "step": 723,
+ "time": 12.89
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9954e-04",
+ "loss": 0.9089,
+ "slid_loss": 1.032,
+ "step": 724,
+ "time": 13.27
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9953e-04",
+ "loss": 0.9925,
+ "slid_loss": 1.0311,
+ "step": 725,
+ "time": 13.72
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9953e-04",
+ "loss": 1.0612,
+ "slid_loss": 1.0317,
+ "step": 726,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9952e-04",
+ "loss": 0.9549,
+ "slid_loss": 1.0311,
+ "step": 727,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9952e-04",
+ "loss": 1.0286,
+ "slid_loss": 1.0313,
+ "step": 728,
+ "time": 12.05
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9951e-04",
+ "loss": 1.0648,
+ "slid_loss": 1.0314,
+ "step": 729,
+ "time": 12.0
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9951e-04",
+ "loss": 1.0779,
+ "slid_loss": 1.0307,
+ "step": 730,
+ "time": 14.26
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9950e-04",
+ "loss": 1.0435,
+ "slid_loss": 1.0318,
+ "step": 731,
+ "time": 13.54
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9950e-04",
+ "loss": 1.0903,
+ "slid_loss": 1.0317,
+ "step": 732,
+ "time": 13.62
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": "1.9950e-04",
+ "loss": 1.0505,
+ "slid_loss": 1.0316,
+ "step": 733,
+ "time": 13.11
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9949e-04",
+ "loss": 0.9809,
+ "slid_loss": 1.0319,
+ "step": 734,
+ "time": 12.96
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9949e-04",
+ "loss": 0.9029,
+ "slid_loss": 1.0299,
+ "step": 735,
+ "time": 13.84
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9948e-04",
+ "loss": 1.0465,
+ "slid_loss": 1.0294,
+ "step": 736,
+ "time": 13.08
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9948e-04",
+ "loss": 1.0225,
+ "slid_loss": 1.0298,
+ "step": 737,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9947e-04",
+ "loss": 1.0417,
+ "slid_loss": 1.0291,
+ "step": 738,
+ "time": 15.45
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9947e-04",
+ "loss": 1.0021,
+ "slid_loss": 1.0288,
+ "step": 739,
+ "time": 13.33
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9946e-04",
+ "loss": 0.8816,
+ "slid_loss": 1.0274,
+ "step": 740,
+ "time": 11.75
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9946e-04",
+ "loss": 1.1319,
+ "slid_loss": 1.0278,
+ "step": 741,
+ "time": 13.01
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9945e-04",
+ "loss": 1.0488,
+ "slid_loss": 1.0271,
+ "step": 742,
+ "time": 10.85
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9945e-04",
+ "loss": 1.0818,
+ "slid_loss": 1.0266,
+ "step": 743,
+ "time": 12.4
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": "1.9944e-04",
+ "loss": 1.0643,
+ "slid_loss": 1.0265,
+ "step": 744,
+ "time": 13.14
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9944e-04",
+ "loss": 1.0599,
+ "slid_loss": 1.0268,
+ "step": 745,
+ "time": 13.99
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9943e-04",
+ "loss": 1.0051,
+ "slid_loss": 1.0272,
+ "step": 746,
+ "time": 13.3
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9943e-04",
+ "loss": 1.0436,
+ "slid_loss": 1.0277,
+ "step": 747,
+ "time": 14.14
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9942e-04",
+ "loss": 1.0075,
+ "slid_loss": 1.0269,
+ "step": 748,
+ "time": 12.16
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9942e-04",
+ "loss": 0.994,
+ "slid_loss": 1.0259,
+ "step": 749,
+ "time": 13.84
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9941e-04",
+ "loss": 1.0013,
+ "slid_loss": 1.0251,
+ "step": 750,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9941e-04",
+ "loss": 0.8608,
+ "slid_loss": 1.0231,
+ "step": 751,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9940e-04",
+ "loss": 0.8892,
+ "slid_loss": 1.0223,
+ "step": 752,
+ "time": 12.83
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9940e-04",
+ "loss": 1.0035,
+ "slid_loss": 1.0214,
+ "step": 753,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": "1.9939e-04",
+ "loss": 0.9436,
+ "slid_loss": 1.0204,
+ "step": 754,
+ "time": 12.22
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9939e-04",
+ "loss": 1.0129,
+ "slid_loss": 1.0205,
+ "step": 755,
+ "time": 13.48
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9938e-04",
+ "loss": 0.9239,
+ "slid_loss": 1.0197,
+ "step": 756,
+ "time": 13.14
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9937e-04",
+ "loss": 0.9885,
+ "slid_loss": 1.0196,
+ "step": 757,
+ "time": 13.44
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9937e-04",
+ "loss": 0.9389,
+ "slid_loss": 1.0188,
+ "step": 758,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9936e-04",
+ "loss": 1.0808,
+ "slid_loss": 1.018,
+ "step": 759,
+ "time": 11.7
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9936e-04",
+ "loss": 0.9554,
+ "slid_loss": 1.0172,
+ "step": 760,
+ "time": 13.8
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9935e-04",
+ "loss": 1.0492,
+ "slid_loss": 1.0175,
+ "step": 761,
+ "time": 11.93
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9935e-04",
+ "loss": 0.9557,
+ "slid_loss": 1.0171,
+ "step": 762,
+ "time": 13.57
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9934e-04",
+ "loss": 1.0079,
+ "slid_loss": 1.0167,
+ "step": 763,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9934e-04",
+ "loss": 1.0476,
+ "slid_loss": 1.0158,
+ "step": 764,
+ "time": 14.08
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": "1.9933e-04",
+ "loss": 1.0188,
+ "slid_loss": 1.0159,
+ "step": 765,
+ "time": 12.29
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9933e-04",
+ "loss": 1.0088,
+ "slid_loss": 1.0163,
+ "step": 766,
+ "time": 13.0
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9932e-04",
+ "loss": 0.9589,
+ "slid_loss": 1.0165,
+ "step": 767,
+ "time": 14.26
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9932e-04",
+ "loss": 1.0309,
+ "slid_loss": 1.0169,
+ "step": 768,
+ "time": 13.78
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9931e-04",
+ "loss": 1.079,
+ "slid_loss": 1.0169,
+ "step": 769,
+ "time": 13.73
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9930e-04",
+ "loss": 0.9621,
+ "slid_loss": 1.0173,
+ "step": 770,
+ "time": 12.89
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9930e-04",
+ "loss": 0.8697,
+ "slid_loss": 1.015,
+ "step": 771,
+ "time": 12.9
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9929e-04",
+ "loss": 0.9752,
+ "slid_loss": 1.0137,
+ "step": 772,
+ "time": 13.62
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9929e-04",
+ "loss": 1.1629,
+ "slid_loss": 1.0146,
+ "step": 773,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9928e-04",
+ "loss": 0.9809,
+ "slid_loss": 1.0151,
+ "step": 774,
+ "time": 13.23
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": "1.9928e-04",
+ "loss": 1.0596,
+ "slid_loss": 1.0155,
+ "step": 775,
+ "time": 12.28
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9927e-04",
+ "loss": 1.0694,
+ "slid_loss": 1.0166,
+ "step": 776,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9926e-04",
+ "loss": 1.0207,
+ "slid_loss": 1.0169,
+ "step": 777,
+ "time": 12.85
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9926e-04",
+ "loss": 1.0557,
+ "slid_loss": 1.016,
+ "step": 778,
+ "time": 13.39
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9925e-04",
+ "loss": 1.061,
+ "slid_loss": 1.0165,
+ "step": 779,
+ "time": 11.34
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9925e-04",
+ "loss": 1.1373,
+ "slid_loss": 1.0176,
+ "step": 780,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9924e-04",
+ "loss": 1.0246,
+ "slid_loss": 1.0176,
+ "step": 781,
+ "time": 12.77
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9924e-04",
+ "loss": 0.9139,
+ "slid_loss": 1.018,
+ "step": 782,
+ "time": 12.2
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9923e-04",
+ "loss": 1.0051,
+ "slid_loss": 1.0178,
+ "step": 783,
+ "time": 13.71
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9922e-04",
+ "loss": 0.9709,
+ "slid_loss": 1.0166,
+ "step": 784,
+ "time": 13.45
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": "1.9922e-04",
+ "loss": 1.0337,
+ "slid_loss": 1.0154,
+ "step": 785,
+ "time": 12.71
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9921e-04",
+ "loss": 1.0183,
+ "slid_loss": 1.0153,
+ "step": 786,
+ "time": 12.27
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9921e-04",
+ "loss": 1.0356,
+ "slid_loss": 1.0151,
+ "step": 787,
+ "time": 13.63
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9920e-04",
+ "loss": 0.9522,
+ "slid_loss": 1.0146,
+ "step": 788,
+ "time": 12.64
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9919e-04",
+ "loss": 0.9984,
+ "slid_loss": 1.0137,
+ "step": 789,
+ "time": 12.9
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9919e-04",
+ "loss": 1.0674,
+ "slid_loss": 1.0136,
+ "step": 790,
+ "time": 11.98
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9918e-04",
+ "loss": 1.1005,
+ "slid_loss": 1.0139,
+ "step": 791,
+ "time": 13.38
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9918e-04",
+ "loss": 1.0685,
+ "slid_loss": 1.0144,
+ "step": 792,
+ "time": 13.48
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9917e-04",
+ "loss": 0.9861,
+ "slid_loss": 1.0141,
+ "step": 793,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9916e-04",
+ "loss": 1.0828,
+ "slid_loss": 1.0154,
+ "step": 794,
+ "time": 13.44
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9916e-04",
+ "loss": 0.9249,
+ "slid_loss": 1.0146,
+ "step": 795,
+ "time": 13.54
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": "1.9915e-04",
+ "loss": 0.988,
+ "slid_loss": 1.0141,
+ "step": 796,
+ "time": 14.08
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9915e-04",
+ "loss": 1.0391,
+ "slid_loss": 1.014,
+ "step": 797,
+ "time": 13.39
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9914e-04",
+ "loss": 1.0448,
+ "slid_loss": 1.0154,
+ "step": 798,
+ "time": 11.38
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9913e-04",
+ "loss": 1.0193,
+ "slid_loss": 1.0152,
+ "step": 799,
+ "time": 12.58
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9913e-04",
+ "loss": 0.9802,
+ "slid_loss": 1.0143,
+ "step": 800,
+ "time": 11.92
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9912e-04",
+ "loss": 0.9691,
+ "slid_loss": 1.0141,
+ "step": 801,
+ "time": 13.18
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9911e-04",
+ "loss": 1.178,
+ "slid_loss": 1.0166,
+ "step": 802,
+ "time": 12.85
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9911e-04",
+ "loss": 0.9051,
+ "slid_loss": 1.0165,
+ "step": 803,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9910e-04",
+ "loss": 0.8386,
+ "slid_loss": 1.0147,
+ "step": 804,
+ "time": 14.14
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9910e-04",
+ "loss": 0.9707,
+ "slid_loss": 1.0137,
+ "step": 805,
+ "time": 13.6
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": "1.9909e-04",
+ "loss": 1.0228,
+ "slid_loss": 1.0133,
+ "step": 806,
+ "time": 11.17
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9908e-04",
+ "loss": 0.9951,
+ "slid_loss": 1.0128,
+ "step": 807,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9908e-04",
+ "loss": 0.9642,
+ "slid_loss": 1.0123,
+ "step": 808,
+ "time": 12.38
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9907e-04",
+ "loss": 0.9216,
+ "slid_loss": 1.0118,
+ "step": 809,
+ "time": 13.01
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9906e-04",
+ "loss": 0.9235,
+ "slid_loss": 1.0099,
+ "step": 810,
+ "time": 13.19
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9906e-04",
+ "loss": 1.06,
+ "slid_loss": 1.0109,
+ "step": 811,
+ "time": 13.56
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9905e-04",
+ "loss": 1.0032,
+ "slid_loss": 1.0107,
+ "step": 812,
+ "time": 14.05
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9904e-04",
+ "loss": 1.0168,
+ "slid_loss": 1.0111,
+ "step": 813,
+ "time": 13.28
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9904e-04",
+ "loss": 1.0079,
+ "slid_loss": 1.0103,
+ "step": 814,
+ "time": 13.32
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9903e-04",
+ "loss": 0.9326,
+ "slid_loss": 1.009,
+ "step": 815,
+ "time": 11.06
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9902e-04",
+ "loss": 1.0019,
+ "slid_loss": 1.0083,
+ "step": 816,
+ "time": 11.96
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": "1.9902e-04",
+ "loss": 0.9545,
+ "slid_loss": 1.0066,
+ "step": 817,
+ "time": 13.87
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9901e-04",
+ "loss": 1.0344,
+ "slid_loss": 1.006,
+ "step": 818,
+ "time": 13.38
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9900e-04",
+ "loss": 1.0008,
+ "slid_loss": 1.0066,
+ "step": 819,
+ "time": 12.88
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9900e-04",
+ "loss": 0.9723,
+ "slid_loss": 1.0059,
+ "step": 820,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9899e-04",
+ "loss": 1.0433,
+ "slid_loss": 1.0072,
+ "step": 821,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9898e-04",
+ "loss": 1.0467,
+ "slid_loss": 1.0067,
+ "step": 822,
+ "time": 13.1
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9898e-04",
+ "loss": 1.008,
+ "slid_loss": 1.0072,
+ "step": 823,
+ "time": 13.24
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9897e-04",
+ "loss": 1.0883,
+ "slid_loss": 1.009,
+ "step": 824,
+ "time": 13.23
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9896e-04",
+ "loss": 1.0326,
+ "slid_loss": 1.0094,
+ "step": 825,
+ "time": 13.75
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9896e-04",
+ "loss": 1.0183,
+ "slid_loss": 1.009,
+ "step": 826,
+ "time": 12.8
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": "1.9895e-04",
+ "loss": 0.9839,
+ "slid_loss": 1.0093,
+ "step": 827,
+ "time": 13.46
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9894e-04",
+ "loss": 1.0894,
+ "slid_loss": 1.0099,
+ "step": 828,
+ "time": 12.91
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9894e-04",
+ "loss": 1.0159,
+ "slid_loss": 1.0094,
+ "step": 829,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9893e-04",
+ "loss": 0.9974,
+ "slid_loss": 1.0086,
+ "step": 830,
+ "time": 13.63
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9892e-04",
+ "loss": 1.0866,
+ "slid_loss": 1.009,
+ "step": 831,
+ "time": 13.59
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9892e-04",
+ "loss": 0.9809,
+ "slid_loss": 1.0079,
+ "step": 832,
+ "time": 13.33
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9891e-04",
+ "loss": 0.9967,
+ "slid_loss": 1.0074,
+ "step": 833,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9890e-04",
+ "loss": 1.0102,
+ "slid_loss": 1.0077,
+ "step": 834,
+ "time": 12.24
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9890e-04",
+ "loss": 1.0953,
+ "slid_loss": 1.0096,
+ "step": 835,
+ "time": 13.63
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9889e-04",
+ "loss": 1.0098,
+ "slid_loss": 1.0093,
+ "step": 836,
+ "time": 13.33
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9888e-04",
+ "loss": 0.9716,
+ "slid_loss": 1.0087,
+ "step": 837,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": "1.9887e-04",
+ "loss": 1.004,
+ "slid_loss": 1.0084,
+ "step": 838,
+ "time": 14.06
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9887e-04",
+ "loss": 0.9311,
+ "slid_loss": 1.0077,
+ "step": 839,
+ "time": 13.39
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9886e-04",
+ "loss": 1.1044,
+ "slid_loss": 1.0099,
+ "step": 840,
+ "time": 14.15
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9885e-04",
+ "loss": 1.0054,
+ "slid_loss": 1.0086,
+ "step": 841,
+ "time": 12.35
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9885e-04",
+ "loss": 1.0644,
+ "slid_loss": 1.0088,
+ "step": 842,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9884e-04",
+ "loss": 1.0625,
+ "slid_loss": 1.0086,
+ "step": 843,
+ "time": 11.07
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9883e-04",
+ "loss": 0.9804,
+ "slid_loss": 1.0077,
+ "step": 844,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9882e-04",
+ "loss": 1.0813,
+ "slid_loss": 1.008,
+ "step": 845,
+ "time": 13.14
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9882e-04",
+ "loss": 1.1213,
+ "slid_loss": 1.0091,
+ "step": 846,
+ "time": 13.86
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9881e-04",
+ "loss": 1.0148,
+ "slid_loss": 1.0088,
+ "step": 847,
+ "time": 13.13
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": "1.9880e-04",
+ "loss": 1.0477,
+ "slid_loss": 1.0092,
+ "step": 848,
+ "time": 11.54
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9879e-04",
+ "loss": 0.9842,
+ "slid_loss": 1.0091,
+ "step": 849,
+ "time": 11.95
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9879e-04",
+ "loss": 1.0497,
+ "slid_loss": 1.0096,
+ "step": 850,
+ "time": 11.62
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9878e-04",
+ "loss": 0.9359,
+ "slid_loss": 1.0104,
+ "step": 851,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9877e-04",
+ "loss": 1.1212,
+ "slid_loss": 1.0127,
+ "step": 852,
+ "time": 14.07
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9877e-04",
+ "loss": 0.9765,
+ "slid_loss": 1.0124,
+ "step": 853,
+ "time": 11.32
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9876e-04",
+ "loss": 0.9339,
+ "slid_loss": 1.0123,
+ "step": 854,
+ "time": 13.27
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9875e-04",
+ "loss": 1.0383,
+ "slid_loss": 1.0126,
+ "step": 855,
+ "time": 13.74
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9874e-04",
+ "loss": 0.9501,
+ "slid_loss": 1.0128,
+ "step": 856,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9874e-04",
+ "loss": 0.9437,
+ "slid_loss": 1.0124,
+ "step": 857,
+ "time": 10.72
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": "1.9873e-04",
+ "loss": 0.9241,
+ "slid_loss": 1.0122,
+ "step": 858,
+ "time": 11.27
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9872e-04",
+ "loss": 0.9676,
+ "slid_loss": 1.0111,
+ "step": 859,
+ "time": 13.36
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9871e-04",
+ "loss": 1.0482,
+ "slid_loss": 1.012,
+ "step": 860,
+ "time": 12.76
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9871e-04",
+ "loss": 0.9695,
+ "slid_loss": 1.0112,
+ "step": 861,
+ "time": 13.16
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9870e-04",
+ "loss": 0.9381,
+ "slid_loss": 1.0111,
+ "step": 862,
+ "time": 11.15
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9869e-04",
+ "loss": 0.9073,
+ "slid_loss": 1.0101,
+ "step": 863,
+ "time": 11.9
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9868e-04",
+ "loss": 0.9883,
+ "slid_loss": 1.0095,
+ "step": 864,
+ "time": 13.47
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9868e-04",
+ "loss": 1.0486,
+ "slid_loss": 1.0098,
+ "step": 865,
+ "time": 13.33
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9867e-04",
+ "loss": 1.0283,
+ "slid_loss": 1.01,
+ "step": 866,
+ "time": 13.65
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9866e-04",
+ "loss": 0.8641,
+ "slid_loss": 1.009,
+ "step": 867,
+ "time": 14.34
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9865e-04",
+ "loss": 1.0782,
+ "slid_loss": 1.0095,
+ "step": 868,
+ "time": 14.0
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": "1.9864e-04",
+ "loss": 0.9565,
+ "slid_loss": 1.0083,
+ "step": 869,
+ "time": 13.45
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9864e-04",
+ "loss": 1.0361,
+ "slid_loss": 1.009,
+ "step": 870,
+ "time": 11.26
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9863e-04",
+ "loss": 0.9371,
+ "slid_loss": 1.0097,
+ "step": 871,
+ "time": 11.64
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9862e-04",
+ "loss": 0.977,
+ "slid_loss": 1.0097,
+ "step": 872,
+ "time": 13.16
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9861e-04",
+ "loss": 0.9393,
+ "slid_loss": 1.0075,
+ "step": 873,
+ "time": 11.67
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9861e-04",
+ "loss": 1.0374,
+ "slid_loss": 1.008,
+ "step": 874,
+ "time": 13.16
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9860e-04",
+ "loss": 0.8848,
+ "slid_loss": 1.0063,
+ "step": 875,
+ "time": 12.25
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9859e-04",
+ "loss": 1.0191,
+ "slid_loss": 1.0058,
+ "step": 876,
+ "time": 13.55
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9858e-04",
+ "loss": 1.0166,
+ "slid_loss": 1.0057,
+ "step": 877,
+ "time": 11.8
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9857e-04",
+ "loss": 0.9387,
+ "slid_loss": 1.0046,
+ "step": 878,
+ "time": 13.02
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": "1.9857e-04",
+ "loss": 1.1353,
+ "slid_loss": 1.0053,
+ "step": 879,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9856e-04",
+ "loss": 0.9761,
+ "slid_loss": 1.0037,
+ "step": 880,
+ "time": 12.28
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9855e-04",
+ "loss": 1.0366,
+ "slid_loss": 1.0038,
+ "step": 881,
+ "time": 12.8
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9854e-04",
+ "loss": 0.9963,
+ "slid_loss": 1.0046,
+ "step": 882,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9853e-04",
+ "loss": 0.9585,
+ "slid_loss": 1.0042,
+ "step": 883,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9853e-04",
+ "loss": 1.0659,
+ "slid_loss": 1.0051,
+ "step": 884,
+ "time": 12.9
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9852e-04",
+ "loss": 1.0106,
+ "slid_loss": 1.0049,
+ "step": 885,
+ "time": 13.34
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9851e-04",
+ "loss": 1.0823,
+ "slid_loss": 1.0055,
+ "step": 886,
+ "time": 11.08
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9850e-04",
+ "loss": 1.0362,
+ "slid_loss": 1.0055,
+ "step": 887,
+ "time": 11.72
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9849e-04",
+ "loss": 0.944,
+ "slid_loss": 1.0055,
+ "step": 888,
+ "time": 11.59
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9848e-04",
+ "loss": 0.9746,
+ "slid_loss": 1.0052,
+ "step": 889,
+ "time": 11.58
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": "1.9848e-04",
+ "loss": 0.9764,
+ "slid_loss": 1.0043,
+ "step": 890,
+ "time": 12.61
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9847e-04",
+ "loss": 1.0767,
+ "slid_loss": 1.0041,
+ "step": 891,
+ "time": 12.97
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9846e-04",
+ "loss": 0.9895,
+ "slid_loss": 1.0033,
+ "step": 892,
+ "time": 13.39
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9845e-04",
+ "loss": 1.0639,
+ "slid_loss": 1.0041,
+ "step": 893,
+ "time": 13.13
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9844e-04",
+ "loss": 0.971,
+ "slid_loss": 1.0029,
+ "step": 894,
+ "time": 15.02
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9844e-04",
+ "loss": 0.9143,
+ "slid_loss": 1.0028,
+ "step": 895,
+ "time": 13.23
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9843e-04",
+ "loss": 0.9306,
+ "slid_loss": 1.0023,
+ "step": 896,
+ "time": 13.02
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9842e-04",
+ "loss": 0.9732,
+ "slid_loss": 1.0016,
+ "step": 897,
+ "time": 13.58
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9841e-04",
+ "loss": 0.8705,
+ "slid_loss": 0.9999,
+ "step": 898,
+ "time": 12.76
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9840e-04",
+ "loss": 1.1178,
+ "slid_loss": 1.0008,
+ "step": 899,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": "1.9839e-04",
+ "loss": 1.0061,
+ "slid_loss": 1.0011,
+ "step": 900,
+ "time": 12.55
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9838e-04",
+ "loss": 0.9413,
+ "slid_loss": 1.0008,
+ "step": 901,
+ "time": 12.02
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9838e-04",
+ "loss": 0.919,
+ "slid_loss": 0.9982,
+ "step": 902,
+ "time": 13.56
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9837e-04",
+ "loss": 0.9952,
+ "slid_loss": 0.9991,
+ "step": 903,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9836e-04",
+ "loss": 0.9613,
+ "slid_loss": 1.0004,
+ "step": 904,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9835e-04",
+ "loss": 0.8695,
+ "slid_loss": 0.9993,
+ "step": 905,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9834e-04",
+ "loss": 1.0116,
+ "slid_loss": 0.9992,
+ "step": 906,
+ "time": 14.25
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9833e-04",
+ "loss": 0.9132,
+ "slid_loss": 0.9984,
+ "step": 907,
+ "time": 13.97
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9833e-04",
+ "loss": 0.9325,
+ "slid_loss": 0.9981,
+ "step": 908,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9832e-04",
+ "loss": 0.9653,
+ "slid_loss": 0.9985,
+ "step": 909,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": "1.9831e-04",
+ "loss": 0.9469,
+ "slid_loss": 0.9988,
+ "step": 910,
+ "time": 13.21
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9830e-04",
+ "loss": 1.0618,
+ "slid_loss": 0.9988,
+ "step": 911,
+ "time": 13.79
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9829e-04",
+ "loss": 1.0181,
+ "slid_loss": 0.9989,
+ "step": 912,
+ "time": 13.09
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9828e-04",
+ "loss": 0.9328,
+ "slid_loss": 0.9981,
+ "step": 913,
+ "time": 13.3
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9827e-04",
+ "loss": 1.0416,
+ "slid_loss": 0.9984,
+ "step": 914,
+ "time": 13.81
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9826e-04",
+ "loss": 1.0824,
+ "slid_loss": 0.9999,
+ "step": 915,
+ "time": 13.18
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9826e-04",
+ "loss": 1.0186,
+ "slid_loss": 1.0001,
+ "step": 916,
+ "time": 11.26
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9825e-04",
+ "loss": 0.9815,
+ "slid_loss": 1.0004,
+ "step": 917,
+ "time": 12.98
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9824e-04",
+ "loss": 1.0509,
+ "slid_loss": 1.0005,
+ "step": 918,
+ "time": 13.31
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9823e-04",
+ "loss": 1.0622,
+ "slid_loss": 1.0011,
+ "step": 919,
+ "time": 12.84
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9822e-04",
+ "loss": 0.9517,
+ "slid_loss": 1.0009,
+ "step": 920,
+ "time": 11.9
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": "1.9821e-04",
+ "loss": 1.021,
+ "slid_loss": 1.0007,
+ "step": 921,
+ "time": 13.68
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9820e-04",
+ "loss": 0.9134,
+ "slid_loss": 0.9994,
+ "step": 922,
+ "time": 12.39
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9819e-04",
+ "loss": 0.8823,
+ "slid_loss": 0.9981,
+ "step": 923,
+ "time": 11.68
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9818e-04",
+ "loss": 1.0961,
+ "slid_loss": 0.9982,
+ "step": 924,
+ "time": 13.34
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9818e-04",
+ "loss": 0.9627,
+ "slid_loss": 0.9975,
+ "step": 925,
+ "time": 14.17
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9817e-04",
+ "loss": 0.9509,
+ "slid_loss": 0.9968,
+ "step": 926,
+ "time": 14.36
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9816e-04",
+ "loss": 0.966,
+ "slid_loss": 0.9967,
+ "step": 927,
+ "time": 13.96
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9815e-04",
+ "loss": 0.9077,
+ "slid_loss": 0.9948,
+ "step": 928,
+ "time": 13.91
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9814e-04",
+ "loss": 1.0008,
+ "slid_loss": 0.9947,
+ "step": 929,
+ "time": 12.87
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9813e-04",
+ "loss": 0.9972,
+ "slid_loss": 0.9947,
+ "step": 930,
+ "time": 13.14
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": "1.9812e-04",
+ "loss": 1.0144,
+ "slid_loss": 0.994,
+ "step": 931,
+ "time": 13.88
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9811e-04",
+ "loss": 1.0171,
+ "slid_loss": 0.9943,
+ "step": 932,
+ "time": 12.88
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9810e-04",
+ "loss": 0.941,
+ "slid_loss": 0.9938,
+ "step": 933,
+ "time": 13.12
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9809e-04",
+ "loss": 1.0739,
+ "slid_loss": 0.9944,
+ "step": 934,
+ "time": 14.38
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9808e-04",
+ "loss": 0.8783,
+ "slid_loss": 0.9922,
+ "step": 935,
+ "time": 11.91
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9808e-04",
+ "loss": 0.9827,
+ "slid_loss": 0.992,
+ "step": 936,
+ "time": 14.06
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9807e-04",
+ "loss": 0.973,
+ "slid_loss": 0.992,
+ "step": 937,
+ "time": 13.76
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9806e-04",
+ "loss": 0.9081,
+ "slid_loss": 0.991,
+ "step": 938,
+ "time": 13.2
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9805e-04",
+ "loss": 1.0273,
+ "slid_loss": 0.992,
+ "step": 939,
+ "time": 13.95
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9804e-04",
+ "loss": 1.08,
+ "slid_loss": 0.9917,
+ "step": 940,
+ "time": 12.28
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9803e-04",
+ "loss": 0.9241,
+ "slid_loss": 0.9909,
+ "step": 941,
+ "time": 11.46
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": "1.9802e-04",
+ "loss": 1.0113,
+ "slid_loss": 0.9904,
+ "step": 942,
+ "time": 13.27
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9801e-04",
+ "loss": 0.9796,
+ "slid_loss": 0.9896,
+ "step": 943,
+ "time": 14.52
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9800e-04",
+ "loss": 0.9488,
+ "slid_loss": 0.9892,
+ "step": 944,
+ "time": 13.68
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9799e-04",
+ "loss": 0.924,
+ "slid_loss": 0.9877,
+ "step": 945,
+ "time": 12.8
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9798e-04",
+ "loss": 1.1244,
+ "slid_loss": 0.9877,
+ "step": 946,
+ "time": 13.64
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9797e-04",
+ "loss": 1.0146,
+ "slid_loss": 0.9877,
+ "step": 947,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9796e-04",
+ "loss": 0.9195,
+ "slid_loss": 0.9864,
+ "step": 948,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9795e-04",
+ "loss": 0.9465,
+ "slid_loss": 0.986,
+ "step": 949,
+ "time": 14.5
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9794e-04",
+ "loss": 0.8867,
+ "slid_loss": 0.9844,
+ "step": 950,
+ "time": 14.26
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9793e-04",
+ "loss": 1.002,
+ "slid_loss": 0.9851,
+ "step": 951,
+ "time": 13.3
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": "1.9793e-04",
+ "loss": 0.9565,
+ "slid_loss": 0.9834,
+ "step": 952,
+ "time": 12.94
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9792e-04",
+ "loss": 0.9462,
+ "slid_loss": 0.9831,
+ "step": 953,
+ "time": 12.84
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9791e-04",
+ "loss": 0.996,
+ "slid_loss": 0.9837,
+ "step": 954,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9790e-04",
+ "loss": 0.9394,
+ "slid_loss": 0.9828,
+ "step": 955,
+ "time": 13.21
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9789e-04",
+ "loss": 0.9914,
+ "slid_loss": 0.9832,
+ "step": 956,
+ "time": 13.42
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9788e-04",
+ "loss": 0.843,
+ "slid_loss": 0.9822,
+ "step": 957,
+ "time": 12.82
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9787e-04",
+ "loss": 0.9026,
+ "slid_loss": 0.9819,
+ "step": 958,
+ "time": 13.03
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9786e-04",
+ "loss": 0.9919,
+ "slid_loss": 0.9822,
+ "step": 959,
+ "time": 12.68
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9785e-04",
+ "loss": 1.0591,
+ "slid_loss": 0.9823,
+ "step": 960,
+ "time": 13.94
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9784e-04",
+ "loss": 0.9357,
+ "slid_loss": 0.982,
+ "step": 961,
+ "time": 13.3
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": "1.9783e-04",
+ "loss": 0.9297,
+ "slid_loss": 0.9819,
+ "step": 962,
+ "time": 12.21
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9782e-04",
+ "loss": 1.0105,
+ "slid_loss": 0.9829,
+ "step": 963,
+ "time": 12.44
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9781e-04",
+ "loss": 1.0013,
+ "slid_loss": 0.983,
+ "step": 964,
+ "time": 14.48
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9780e-04",
+ "loss": 0.967,
+ "slid_loss": 0.9822,
+ "step": 965,
+ "time": 13.09
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9779e-04",
+ "loss": 1.0322,
+ "slid_loss": 0.9823,
+ "step": 966,
+ "time": 13.75
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9778e-04",
+ "loss": 0.9993,
+ "slid_loss": 0.9836,
+ "step": 967,
+ "time": 13.18
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9777e-04",
+ "loss": 0.9493,
+ "slid_loss": 0.9823,
+ "step": 968,
+ "time": 13.09
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9776e-04",
+ "loss": 0.8998,
+ "slid_loss": 0.9818,
+ "step": 969,
+ "time": 13.85
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9775e-04",
+ "loss": 0.9338,
+ "slid_loss": 0.9807,
+ "step": 970,
+ "time": 13.28
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9774e-04",
+ "loss": 1.0208,
+ "slid_loss": 0.9816,
+ "step": 971,
+ "time": 13.8
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9773e-04",
+ "loss": 0.956,
+ "slid_loss": 0.9814,
+ "step": 972,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": "1.9772e-04",
+ "loss": 0.9816,
+ "slid_loss": 0.9818,
+ "step": 973,
+ "time": 13.86
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9771e-04",
+ "loss": 0.921,
+ "slid_loss": 0.9806,
+ "step": 974,
+ "time": 12.45
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9770e-04",
+ "loss": 0.9402,
+ "slid_loss": 0.9812,
+ "step": 975,
+ "time": 14.01
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9769e-04",
+ "loss": 0.8464,
+ "slid_loss": 0.9794,
+ "step": 976,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9768e-04",
+ "loss": 0.9949,
+ "slid_loss": 0.9792,
+ "step": 977,
+ "time": 13.7
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9767e-04",
+ "loss": 0.9473,
+ "slid_loss": 0.9793,
+ "step": 978,
+ "time": 11.32
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9766e-04",
+ "loss": 1.0231,
+ "slid_loss": 0.9782,
+ "step": 979,
+ "time": 13.93
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9765e-04",
+ "loss": 0.9871,
+ "slid_loss": 0.9783,
+ "step": 980,
+ "time": 13.42
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9764e-04",
+ "loss": 1.0541,
+ "slid_loss": 0.9785,
+ "step": 981,
+ "time": 13.23
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9763e-04",
+ "loss": 0.916,
+ "slid_loss": 0.9777,
+ "step": 982,
+ "time": 12.0
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": "1.9762e-04",
+ "loss": 0.937,
+ "slid_loss": 0.9775,
+ "step": 983,
+ "time": 12.75
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9761e-04",
+ "loss": 0.9628,
+ "slid_loss": 0.9764,
+ "step": 984,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9760e-04",
+ "loss": 1.1171,
+ "slid_loss": 0.9775,
+ "step": 985,
+ "time": 13.55
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9759e-04",
+ "loss": 1.032,
+ "slid_loss": 0.977,
+ "step": 986,
+ "time": 12.24
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9758e-04",
+ "loss": 0.97,
+ "slid_loss": 0.9763,
+ "step": 987,
+ "time": 14.3
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9757e-04",
+ "loss": 0.9148,
+ "slid_loss": 0.976,
+ "step": 988,
+ "time": 11.34
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9756e-04",
+ "loss": 0.9893,
+ "slid_loss": 0.9762,
+ "step": 989,
+ "time": 13.78
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9755e-04",
+ "loss": 1.0352,
+ "slid_loss": 0.9768,
+ "step": 990,
+ "time": 13.91
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9754e-04",
+ "loss": 1.0895,
+ "slid_loss": 0.9769,
+ "step": 991,
+ "time": 13.63
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9753e-04",
+ "loss": 1.0234,
+ "slid_loss": 0.9772,
+ "step": 992,
+ "time": 13.62
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9752e-04",
+ "loss": 0.8328,
+ "slid_loss": 0.9749,
+ "step": 993,
+ "time": 13.77
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": "1.9750e-04",
+ "loss": 0.9009,
+ "slid_loss": 0.9742,
+ "step": 994,
+ "time": 11.7
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9749e-04",
+ "loss": 0.9438,
+ "slid_loss": 0.9745,
+ "step": 995,
+ "time": 13.54
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9748e-04",
+ "loss": 1.0395,
+ "slid_loss": 0.9756,
+ "step": 996,
+ "time": 12.84
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9747e-04",
+ "loss": 0.9603,
+ "slid_loss": 0.9755,
+ "step": 997,
+ "time": 13.33
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9746e-04",
+ "loss": 1.0074,
+ "slid_loss": 0.9769,
+ "step": 998,
+ "time": 12.81
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9745e-04",
+ "loss": 1.0163,
+ "slid_loss": 0.9758,
+ "step": 999,
+ "time": 10.79
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9744e-04",
+ "loss": 0.9061,
+ "slid_loss": 0.9748,
+ "step": 1000,
+ "time": 13.43
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9743e-04",
+ "loss": 0.92,
+ "slid_loss": 0.9746,
+ "step": 1001,
+ "time": 12.93
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9742e-04",
+ "loss": 1.0226,
+ "slid_loss": 0.9757,
+ "step": 1002,
+ "time": 13.41
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9741e-04",
+ "loss": 1.0246,
+ "slid_loss": 0.976,
+ "step": 1003,
+ "time": 11.67
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": "1.9740e-04",
+ "loss": 0.9364,
+ "slid_loss": 0.9757,
+ "step": 1004,
+ "time": 11.66
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9739e-04",
+ "loss": 0.997,
+ "slid_loss": 0.977,
+ "step": 1005,
+ "time": 13.38
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9738e-04",
+ "loss": 0.9579,
+ "slid_loss": 0.9764,
+ "step": 1006,
+ "time": 13.57
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9737e-04",
+ "loss": 0.932,
+ "slid_loss": 0.9766,
+ "step": 1007,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9736e-04",
+ "loss": 1.0112,
+ "slid_loss": 0.9774,
+ "step": 1008,
+ "time": 13.82
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9735e-04",
+ "loss": 1.0112,
+ "slid_loss": 0.9779,
+ "step": 1009,
+ "time": 13.48
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9733e-04",
+ "loss": 1.0069,
+ "slid_loss": 0.9785,
+ "step": 1010,
+ "time": 13.25
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9732e-04",
+ "loss": 0.9538,
+ "slid_loss": 0.9774,
+ "step": 1011,
+ "time": 14.06
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9731e-04",
+ "loss": 0.8669,
+ "slid_loss": 0.9759,
+ "step": 1012,
+ "time": 13.4
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9730e-04",
+ "loss": 0.9567,
+ "slid_loss": 0.9761,
+ "step": 1013,
+ "time": 12.22
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": "1.9729e-04",
+ "loss": 1.0478,
+ "slid_loss": 0.9762,
+ "step": 1014,
+ "time": 11.7
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9728e-04",
+ "loss": 0.9359,
+ "slid_loss": 0.9747,
+ "step": 1015,
+ "time": 13.29
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9727e-04",
+ "loss": 0.9848,
+ "slid_loss": 0.9744,
+ "step": 1016,
+ "time": 10.49
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9726e-04",
+ "loss": 0.9545,
+ "slid_loss": 0.9741,
+ "step": 1017,
+ "time": 13.67
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9725e-04",
+ "loss": 1.0427,
+ "slid_loss": 0.974,
+ "step": 1018,
+ "time": 12.23
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9724e-04",
+ "loss": 1.0048,
+ "slid_loss": 0.9735,
+ "step": 1019,
+ "time": 14.07
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9723e-04",
+ "loss": 0.9756,
+ "slid_loss": 0.9737,
+ "step": 1020,
+ "time": 11.78
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9721e-04",
+ "loss": 1.0282,
+ "slid_loss": 0.9738,
+ "step": 1021,
+ "time": 12.91
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9720e-04",
+ "loss": 1.1092,
+ "slid_loss": 0.9757,
+ "step": 1022,
+ "time": 13.48
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9719e-04",
+ "loss": 0.998,
+ "slid_loss": 0.9769,
+ "step": 1023,
+ "time": 14.11
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9718e-04",
+ "loss": 0.9313,
+ "slid_loss": 0.9752,
+ "step": 1024,
+ "time": 13.15
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": "1.9717e-04",
+ "loss": 0.899,
+ "slid_loss": 0.9746,
+ "step": 1025,
+ "time": 13.8
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9716e-04",
+ "loss": 0.9183,
+ "slid_loss": 0.9743,
+ "step": 1026,
+ "time": 13.37
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9715e-04",
+ "loss": 1.0916,
+ "slid_loss": 0.9755,
+ "step": 1027,
+ "time": 13.83
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9714e-04",
+ "loss": 1.0198,
+ "slid_loss": 0.9767,
+ "step": 1028,
+ "time": 13.39
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9713e-04",
+ "loss": 0.8994,
+ "slid_loss": 0.9756,
+ "step": 1029,
+ "time": 12.52
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9711e-04",
+ "loss": 1.0132,
+ "slid_loss": 0.9758,
+ "step": 1030,
+ "time": 12.58
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9710e-04",
+ "loss": 1.0797,
+ "slid_loss": 0.9765,
+ "step": 1031,
+ "time": 13.81
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9709e-04",
+ "loss": 0.9583,
+ "slid_loss": 0.9759,
+ "step": 1032,
+ "time": 11.87
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9708e-04",
+ "loss": 0.9373,
+ "slid_loss": 0.9758,
+ "step": 1033,
+ "time": 13.53
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9707e-04",
+ "loss": 0.95,
+ "slid_loss": 0.9746,
+ "step": 1034,
+ "time": 13.42
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": "1.9706e-04",
+ "loss": 0.9967,
+ "slid_loss": 0.9758,
+ "step": 1035,
+ "time": 12.74
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9705e-04",
+ "loss": 1.0089,
+ "slid_loss": 0.976,
+ "step": 1036,
+ "time": 12.37
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9704e-04",
+ "loss": 1.0543,
+ "slid_loss": 0.9768,
+ "step": 1037,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9702e-04",
+ "loss": 0.9265,
+ "slid_loss": 0.977,
+ "step": 1038,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9701e-04",
+ "loss": 1.0738,
+ "slid_loss": 0.9775,
+ "step": 1039,
+ "time": 12.14
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9700e-04",
+ "loss": 1.0418,
+ "slid_loss": 0.9771,
+ "step": 1040,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9699e-04",
+ "loss": 0.8768,
+ "slid_loss": 0.9766,
+ "step": 1041,
+ "time": 14.16
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9698e-04",
+ "loss": 0.9259,
+ "slid_loss": 0.9758,
+ "step": 1042,
+ "time": 174.68
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9697e-04",
+ "loss": 0.9491,
+ "slid_loss": 0.9755,
+ "step": 1043,
+ "time": 11.21
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9696e-04",
+ "loss": 0.9508,
+ "slid_loss": 0.9755,
+ "step": 1044,
+ "time": 11.52
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9694e-04",
+ "loss": 0.9902,
+ "slid_loss": 0.9762,
+ "step": 1045,
+ "time": 13.92
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": "1.9693e-04",
+ "loss": 0.9966,
+ "slid_loss": 0.9749,
+ "step": 1046,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9692e-04",
+ "loss": 0.9123,
+ "slid_loss": 0.9739,
+ "step": 1047,
+ "time": 11.33
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9691e-04",
+ "loss": 0.9404,
+ "slid_loss": 0.9741,
+ "step": 1048,
+ "time": 11.54
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9690e-04",
+ "loss": 0.8762,
+ "slid_loss": 0.9734,
+ "step": 1049,
+ "time": 11.54
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9689e-04",
+ "loss": 0.9187,
+ "slid_loss": 0.9737,
+ "step": 1050,
+ "time": 14.14
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9687e-04",
+ "loss": 0.8716,
+ "slid_loss": 0.9724,
+ "step": 1051,
+ "time": 13.26
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9686e-04",
+ "loss": 0.9457,
+ "slid_loss": 0.9723,
+ "step": 1052,
+ "time": 12.89
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9685e-04",
+ "loss": 0.9017,
+ "slid_loss": 0.9718,
+ "step": 1053,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9684e-04",
+ "loss": 0.9662,
+ "slid_loss": 0.9715,
+ "step": 1054,
+ "time": 14.36
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9683e-04",
+ "loss": 0.9353,
+ "slid_loss": 0.9715,
+ "step": 1055,
+ "time": 12.54
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": "1.9682e-04",
+ "loss": 0.9734,
+ "slid_loss": 0.9713,
+ "step": 1056,
+ "time": 12.47
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9680e-04",
+ "loss": 1.0055,
+ "slid_loss": 0.9729,
+ "step": 1057,
+ "time": 14.32
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9679e-04",
+ "loss": 0.8676,
+ "slid_loss": 0.9726,
+ "step": 1058,
+ "time": 13.74
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9678e-04",
+ "loss": 0.9367,
+ "slid_loss": 0.972,
+ "step": 1059,
+ "time": 13.19
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9677e-04",
+ "loss": 1.0135,
+ "slid_loss": 0.9716,
+ "step": 1060,
+ "time": 13.2
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9676e-04",
+ "loss": 0.9528,
+ "slid_loss": 0.9717,
+ "step": 1061,
+ "time": 11.01
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9674e-04",
+ "loss": 0.892,
+ "slid_loss": 0.9714,
+ "step": 1062,
+ "time": 13.42
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9673e-04",
+ "loss": 0.9255,
+ "slid_loss": 0.9705,
+ "step": 1063,
+ "time": 11.41
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9672e-04",
+ "loss": 0.9277,
+ "slid_loss": 0.9698,
+ "step": 1064,
+ "time": 14.23
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9671e-04",
+ "loss": 1.1172,
+ "slid_loss": 0.9713,
+ "step": 1065,
+ "time": 13.51
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9670e-04",
+ "loss": 0.9519,
+ "slid_loss": 0.9705,
+ "step": 1066,
+ "time": 13.71
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": "1.9668e-04",
+ "loss": 1.0635,
+ "slid_loss": 0.9711,
+ "step": 1067,
+ "time": 14.01
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9667e-04",
+ "loss": 1.0054,
+ "slid_loss": 0.9717,
+ "step": 1068,
+ "time": 12.75
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9666e-04",
+ "loss": 1.0559,
+ "slid_loss": 0.9732,
+ "step": 1069,
+ "time": 13.33
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9665e-04",
+ "loss": 1.0143,
+ "slid_loss": 0.9741,
+ "step": 1070,
+ "time": 14.09
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9664e-04",
+ "loss": 1.0064,
+ "slid_loss": 0.9739,
+ "step": 1071,
+ "time": 12.53
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9662e-04",
+ "loss": 1.0137,
+ "slid_loss": 0.9745,
+ "step": 1072,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9661e-04",
+ "loss": 0.9903,
+ "slid_loss": 0.9746,
+ "step": 1073,
+ "time": 13.44
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9660e-04",
+ "loss": 0.9629,
+ "slid_loss": 0.975,
+ "step": 1074,
+ "time": 13.63
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9659e-04",
+ "loss": 0.9543,
+ "slid_loss": 0.9751,
+ "step": 1075,
+ "time": 12.97
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9658e-04",
+ "loss": 0.9168,
+ "slid_loss": 0.9758,
+ "step": 1076,
+ "time": 11.62
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": "1.9656e-04",
+ "loss": 0.9888,
+ "slid_loss": 0.9758,
+ "step": 1077,
+ "time": 13.63
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9655e-04",
+ "loss": 0.8354,
+ "slid_loss": 0.9747,
+ "step": 1078,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9654e-04",
+ "loss": 0.9042,
+ "slid_loss": 0.9735,
+ "step": 1079,
+ "time": 12.41
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9653e-04",
+ "loss": 1.0398,
+ "slid_loss": 0.974,
+ "step": 1080,
+ "time": 13.93
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9651e-04",
+ "loss": 1.0013,
+ "slid_loss": 0.9735,
+ "step": 1081,
+ "time": 13.17
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9650e-04",
+ "loss": 1.0264,
+ "slid_loss": 0.9746,
+ "step": 1082,
+ "time": 13.16
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9649e-04",
+ "loss": 0.9633,
+ "slid_loss": 0.9748,
+ "step": 1083,
+ "time": 13.34
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9648e-04",
+ "loss": 1.0064,
+ "slid_loss": 0.9753,
+ "step": 1084,
+ "time": 12.8
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9647e-04",
+ "loss": 0.9201,
+ "slid_loss": 0.9733,
+ "step": 1085,
+ "time": 13.65
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9645e-04",
+ "loss": 0.948,
+ "slid_loss": 0.9725,
+ "step": 1086,
+ "time": 14.32
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": "1.9644e-04",
+ "loss": 0.9234,
+ "slid_loss": 0.972,
+ "step": 1087,
+ "time": 12.87
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9643e-04",
+ "loss": 1.0118,
+ "slid_loss": 0.973,
+ "step": 1088,
+ "time": 12.84
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9642e-04",
+ "loss": 0.8761,
+ "slid_loss": 0.9718,
+ "step": 1089,
+ "time": 12.36
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9640e-04",
+ "loss": 1.0149,
+ "slid_loss": 0.9716,
+ "step": 1090,
+ "time": 13.18
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9639e-04",
+ "loss": 1.0226,
+ "slid_loss": 0.971,
+ "step": 1091,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9638e-04",
+ "loss": 0.9232,
+ "slid_loss": 0.97,
+ "step": 1092,
+ "time": 14.08
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9637e-04",
+ "loss": 0.9565,
+ "slid_loss": 0.9712,
+ "step": 1093,
+ "time": 14.31
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9635e-04",
+ "loss": 0.9235,
+ "slid_loss": 0.9714,
+ "step": 1094,
+ "time": 13.33
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9634e-04",
+ "loss": 0.9145,
+ "slid_loss": 0.9711,
+ "step": 1095,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9633e-04",
+ "loss": 0.9809,
+ "slid_loss": 0.9705,
+ "step": 1096,
+ "time": 11.25
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9631e-04",
+ "loss": 0.8829,
+ "slid_loss": 0.9698,
+ "step": 1097,
+ "time": 13.15
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": "1.9630e-04",
+ "loss": 0.8922,
+ "slid_loss": 0.9686,
+ "step": 1098,
+ "time": 13.14
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9629e-04",
+ "loss": 0.8461,
+ "slid_loss": 0.9669,
+ "step": 1099,
+ "time": 11.99
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9628e-04",
+ "loss": 0.8706,
+ "slid_loss": 0.9666,
+ "step": 1100,
+ "time": 13.85
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9626e-04",
+ "loss": 0.8988,
+ "slid_loss": 0.9663,
+ "step": 1101,
+ "time": 13.05
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9625e-04",
+ "loss": 0.8927,
+ "slid_loss": 0.965,
+ "step": 1102,
+ "time": 11.57
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9624e-04",
+ "loss": 0.8794,
+ "slid_loss": 0.9636,
+ "step": 1103,
+ "time": 13.03
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9623e-04",
+ "loss": 0.96,
+ "slid_loss": 0.9638,
+ "step": 1104,
+ "time": 13.4
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9621e-04",
+ "loss": 1.049,
+ "slid_loss": 0.9644,
+ "step": 1105,
+ "time": 13.93
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9620e-04",
+ "loss": 1.0336,
+ "slid_loss": 0.9651,
+ "step": 1106,
+ "time": 11.59
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9619e-04",
+ "loss": 1.0636,
+ "slid_loss": 0.9664,
+ "step": 1107,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": "1.9617e-04",
+ "loss": 0.8595,
+ "slid_loss": 0.9649,
+ "step": 1108,
+ "time": 13.4
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9616e-04",
+ "loss": 0.9778,
+ "slid_loss": 0.9646,
+ "step": 1109,
+ "time": 13.26
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9615e-04",
+ "loss": 0.9504,
+ "slid_loss": 0.964,
+ "step": 1110,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9614e-04",
+ "loss": 0.9166,
+ "slid_loss": 0.9636,
+ "step": 1111,
+ "time": 13.84
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9612e-04",
+ "loss": 1.0352,
+ "slid_loss": 0.9653,
+ "step": 1112,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9611e-04",
+ "loss": 0.9281,
+ "slid_loss": 0.965,
+ "step": 1113,
+ "time": 13.66
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9610e-04",
+ "loss": 0.9452,
+ "slid_loss": 0.964,
+ "step": 1114,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9608e-04",
+ "loss": 1.0631,
+ "slid_loss": 0.9653,
+ "step": 1115,
+ "time": 10.74
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9607e-04",
+ "loss": 0.9703,
+ "slid_loss": 0.9651,
+ "step": 1116,
+ "time": 13.83
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9606e-04",
+ "loss": 0.9034,
+ "slid_loss": 0.9646,
+ "step": 1117,
+ "time": 13.09
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9605e-04",
+ "loss": 0.9517,
+ "slid_loss": 0.9637,
+ "step": 1118,
+ "time": 12.85
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": "1.9603e-04",
+ "loss": 0.9328,
+ "slid_loss": 0.963,
+ "step": 1119,
+ "time": 13.23
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9602e-04",
+ "loss": 0.9799,
+ "slid_loss": 0.963,
+ "step": 1120,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9601e-04",
+ "loss": 0.8996,
+ "slid_loss": 0.9618,
+ "step": 1121,
+ "time": 15.25
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9599e-04",
+ "loss": 0.856,
+ "slid_loss": 0.9592,
+ "step": 1122,
+ "time": 13.99
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9598e-04",
+ "loss": 1.0382,
+ "slid_loss": 0.9596,
+ "step": 1123,
+ "time": 11.43
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9597e-04",
+ "loss": 1.0235,
+ "slid_loss": 0.9605,
+ "step": 1124,
+ "time": 13.33
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9595e-04",
+ "loss": 0.9985,
+ "slid_loss": 0.9615,
+ "step": 1125,
+ "time": 13.78
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9594e-04",
+ "loss": 1.0725,
+ "slid_loss": 0.9631,
+ "step": 1126,
+ "time": 12.37
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9593e-04",
+ "loss": 1.043,
+ "slid_loss": 0.9626,
+ "step": 1127,
+ "time": 13.7
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9591e-04",
+ "loss": 0.8429,
+ "slid_loss": 0.9608,
+ "step": 1128,
+ "time": 13.49
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": "1.9590e-04",
+ "loss": 1.0798,
+ "slid_loss": 0.9626,
+ "step": 1129,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9589e-04",
+ "loss": 0.9815,
+ "slid_loss": 0.9623,
+ "step": 1130,
+ "time": 14.3
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9587e-04",
+ "loss": 0.9771,
+ "slid_loss": 0.9613,
+ "step": 1131,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9586e-04",
+ "loss": 0.973,
+ "slid_loss": 0.9614,
+ "step": 1132,
+ "time": 12.12
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9585e-04",
+ "loss": 1.017,
+ "slid_loss": 0.9622,
+ "step": 1133,
+ "time": 12.1
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9583e-04",
+ "loss": 0.9913,
+ "slid_loss": 0.9626,
+ "step": 1134,
+ "time": 14.29
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9582e-04",
+ "loss": 0.992,
+ "slid_loss": 0.9626,
+ "step": 1135,
+ "time": 13.85
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9581e-04",
+ "loss": 0.8917,
+ "slid_loss": 0.9614,
+ "step": 1136,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9579e-04",
+ "loss": 1.0444,
+ "slid_loss": 0.9613,
+ "step": 1137,
+ "time": 11.6
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9578e-04",
+ "loss": 0.992,
+ "slid_loss": 0.962,
+ "step": 1138,
+ "time": 13.44
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": "1.9577e-04",
+ "loss": 0.8975,
+ "slid_loss": 0.9602,
+ "step": 1139,
+ "time": 13.01
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9575e-04",
+ "loss": 0.9197,
+ "slid_loss": 0.959,
+ "step": 1140,
+ "time": 13.78
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9574e-04",
+ "loss": 0.9219,
+ "slid_loss": 0.9594,
+ "step": 1141,
+ "time": 13.64
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9573e-04",
+ "loss": 0.9325,
+ "slid_loss": 0.9595,
+ "step": 1142,
+ "time": 12.92
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9571e-04",
+ "loss": 0.9382,
+ "slid_loss": 0.9594,
+ "step": 1143,
+ "time": 12.89
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9570e-04",
+ "loss": 0.9687,
+ "slid_loss": 0.9596,
+ "step": 1144,
+ "time": 10.61
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9568e-04",
+ "loss": 0.9943,
+ "slid_loss": 0.9596,
+ "step": 1145,
+ "time": 13.34
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9567e-04",
+ "loss": 0.8942,
+ "slid_loss": 0.9586,
+ "step": 1146,
+ "time": 13.59
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9566e-04",
+ "loss": 0.8806,
+ "slid_loss": 0.9583,
+ "step": 1147,
+ "time": 13.98
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9564e-04",
+ "loss": 1.0034,
+ "slid_loss": 0.9589,
+ "step": 1148,
+ "time": 11.51
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9563e-04",
+ "loss": 0.8329,
+ "slid_loss": 0.9585,
+ "step": 1149,
+ "time": 11.54
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": "1.9562e-04",
+ "loss": 1.064,
+ "slid_loss": 0.9599,
+ "step": 1150,
+ "time": 12.67
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9560e-04",
+ "loss": 0.891,
+ "slid_loss": 0.9601,
+ "step": 1151,
+ "time": 13.32
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9559e-04",
+ "loss": 0.9212,
+ "slid_loss": 0.9599,
+ "step": 1152,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9557e-04",
+ "loss": 0.9629,
+ "slid_loss": 0.9605,
+ "step": 1153,
+ "time": 11.28
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9556e-04",
+ "loss": 0.9159,
+ "slid_loss": 0.96,
+ "step": 1154,
+ "time": 13.57
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9555e-04",
+ "loss": 0.8557,
+ "slid_loss": 0.9592,
+ "step": 1155,
+ "time": 13.52
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9553e-04",
+ "loss": 0.9897,
+ "slid_loss": 0.9594,
+ "step": 1156,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9552e-04",
+ "loss": 0.9237,
+ "slid_loss": 0.9585,
+ "step": 1157,
+ "time": 12.28
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9551e-04",
+ "loss": 1.0264,
+ "slid_loss": 0.9601,
+ "step": 1158,
+ "time": 14.53
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9549e-04",
+ "loss": 0.9369,
+ "slid_loss": 0.9601,
+ "step": 1159,
+ "time": 13.57
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": "1.9548e-04",
+ "loss": 0.8212,
+ "slid_loss": 0.9582,
+ "step": 1160,
+ "time": 11.96
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9546e-04",
+ "loss": 1.0195,
+ "slid_loss": 0.9589,
+ "step": 1161,
+ "time": 13.29
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9545e-04",
+ "loss": 0.9329,
+ "slid_loss": 0.9593,
+ "step": 1162,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9544e-04",
+ "loss": 1.0429,
+ "slid_loss": 0.9605,
+ "step": 1163,
+ "time": 11.42
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9542e-04",
+ "loss": 0.8298,
+ "slid_loss": 0.9595,
+ "step": 1164,
+ "time": 11.68
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9541e-04",
+ "loss": 0.9673,
+ "slid_loss": 0.958,
+ "step": 1165,
+ "time": 11.66
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9539e-04",
+ "loss": 1.0574,
+ "slid_loss": 0.959,
+ "step": 1166,
+ "time": 13.56
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9538e-04",
+ "loss": 0.9713,
+ "slid_loss": 0.9581,
+ "step": 1167,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9537e-04",
+ "loss": 0.837,
+ "slid_loss": 0.9564,
+ "step": 1168,
+ "time": 11.18
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9535e-04",
+ "loss": 0.9485,
+ "slid_loss": 0.9554,
+ "step": 1169,
+ "time": 13.92
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9534e-04",
+ "loss": 1.0021,
+ "slid_loss": 0.9552,
+ "step": 1170,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": "1.9532e-04",
+ "loss": 0.9,
+ "slid_loss": 0.9542,
+ "step": 1171,
+ "time": 13.61
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9531e-04",
+ "loss": 0.9576,
+ "slid_loss": 0.9536,
+ "step": 1172,
+ "time": 13.84
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9529e-04",
+ "loss": 0.9676,
+ "slid_loss": 0.9534,
+ "step": 1173,
+ "time": 13.19
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9528e-04",
+ "loss": 0.9994,
+ "slid_loss": 0.9537,
+ "step": 1174,
+ "time": 13.22
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9527e-04",
+ "loss": 1.002,
+ "slid_loss": 0.9542,
+ "step": 1175,
+ "time": 10.91
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9525e-04",
+ "loss": 0.8348,
+ "slid_loss": 0.9534,
+ "step": 1176,
+ "time": 12.43
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9524e-04",
+ "loss": 0.9222,
+ "slid_loss": 0.9527,
+ "step": 1177,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9522e-04",
+ "loss": 0.9225,
+ "slid_loss": 0.9536,
+ "step": 1178,
+ "time": 13.56
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9521e-04",
+ "loss": 0.9172,
+ "slid_loss": 0.9537,
+ "step": 1179,
+ "time": 13.7
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9519e-04",
+ "loss": 1.0012,
+ "slid_loss": 0.9534,
+ "step": 1180,
+ "time": 13.84
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": "1.9518e-04",
+ "loss": 0.9266,
+ "slid_loss": 0.9526,
+ "step": 1181,
+ "time": 12.83
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9517e-04",
+ "loss": 0.9005,
+ "slid_loss": 0.9514,
+ "step": 1182,
+ "time": 11.23
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9515e-04",
+ "loss": 1.0599,
+ "slid_loss": 0.9523,
+ "step": 1183,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9514e-04",
+ "loss": 0.8789,
+ "slid_loss": 0.951,
+ "step": 1184,
+ "time": 13.23
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9512e-04",
+ "loss": 0.9715,
+ "slid_loss": 0.9516,
+ "step": 1185,
+ "time": 11.03
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9511e-04",
+ "loss": 0.9176,
+ "slid_loss": 0.9513,
+ "step": 1186,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9509e-04",
+ "loss": 0.9692,
+ "slid_loss": 0.9517,
+ "step": 1187,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9508e-04",
+ "loss": 0.8751,
+ "slid_loss": 0.9503,
+ "step": 1188,
+ "time": 12.8
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9507e-04",
+ "loss": 0.8907,
+ "slid_loss": 0.9505,
+ "step": 1189,
+ "time": 12.04
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9505e-04",
+ "loss": 0.9373,
+ "slid_loss": 0.9497,
+ "step": 1190,
+ "time": 13.78
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": "1.9504e-04",
+ "loss": 1.0075,
+ "slid_loss": 0.9496,
+ "step": 1191,
+ "time": 12.29
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9502e-04",
+ "loss": 0.9801,
+ "slid_loss": 0.9501,
+ "step": 1192,
+ "time": 13.18
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9501e-04",
+ "loss": 0.9439,
+ "slid_loss": 0.95,
+ "step": 1193,
+ "time": 13.8
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9499e-04",
+ "loss": 0.7925,
+ "slid_loss": 0.9487,
+ "step": 1194,
+ "time": 13.76
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9498e-04",
+ "loss": 0.9536,
+ "slid_loss": 0.9491,
+ "step": 1195,
+ "time": 14.09
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9496e-04",
+ "loss": 0.8415,
+ "slid_loss": 0.9477,
+ "step": 1196,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9495e-04",
+ "loss": 1.001,
+ "slid_loss": 0.9489,
+ "step": 1197,
+ "time": 13.26
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9493e-04",
+ "loss": 0.9278,
+ "slid_loss": 0.9492,
+ "step": 1198,
+ "time": 14.02
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9492e-04",
+ "loss": 0.967,
+ "slid_loss": 0.9504,
+ "step": 1199,
+ "time": 13.37
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9490e-04",
+ "loss": 0.8811,
+ "slid_loss": 0.9505,
+ "step": 1200,
+ "time": 13.49
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9489e-04",
+ "loss": 1.0216,
+ "slid_loss": 0.9518,
+ "step": 1201,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": "1.9487e-04",
+ "loss": 0.9399,
+ "slid_loss": 0.9522,
+ "step": 1202,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9486e-04",
+ "loss": 0.9094,
+ "slid_loss": 0.9525,
+ "step": 1203,
+ "time": 13.37
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9484e-04",
+ "loss": 0.9303,
+ "slid_loss": 0.9522,
+ "step": 1204,
+ "time": 13.23
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9483e-04",
+ "loss": 0.9021,
+ "slid_loss": 0.9508,
+ "step": 1205,
+ "time": 13.03
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9482e-04",
+ "loss": 0.9068,
+ "slid_loss": 0.9495,
+ "step": 1206,
+ "time": 13.57
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9480e-04",
+ "loss": 0.9538,
+ "slid_loss": 0.9484,
+ "step": 1207,
+ "time": 12.06
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9479e-04",
+ "loss": 1.0151,
+ "slid_loss": 0.95,
+ "step": 1208,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9477e-04",
+ "loss": 0.8581,
+ "slid_loss": 0.9488,
+ "step": 1209,
+ "time": 13.9
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9476e-04",
+ "loss": 0.9223,
+ "slid_loss": 0.9485,
+ "step": 1210,
+ "time": 12.19
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9474e-04",
+ "loss": 0.8783,
+ "slid_loss": 0.9481,
+ "step": 1211,
+ "time": 12.29
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": "1.9473e-04",
+ "loss": 0.9753,
+ "slid_loss": 0.9475,
+ "step": 1212,
+ "time": 13.4
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9471e-04",
+ "loss": 0.9114,
+ "slid_loss": 0.9473,
+ "step": 1213,
+ "time": 14.06
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9470e-04",
+ "loss": 0.9717,
+ "slid_loss": 0.9476,
+ "step": 1214,
+ "time": 13.87
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9468e-04",
+ "loss": 0.8711,
+ "slid_loss": 0.9457,
+ "step": 1215,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9467e-04",
+ "loss": 0.9072,
+ "slid_loss": 0.9451,
+ "step": 1216,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9465e-04",
+ "loss": 0.8867,
+ "slid_loss": 0.9449,
+ "step": 1217,
+ "time": 12.99
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9464e-04",
+ "loss": 1.0917,
+ "slid_loss": 0.9463,
+ "step": 1218,
+ "time": 12.91
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9462e-04",
+ "loss": 0.9549,
+ "slid_loss": 0.9465,
+ "step": 1219,
+ "time": 13.33
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9461e-04",
+ "loss": 0.9797,
+ "slid_loss": 0.9465,
+ "step": 1220,
+ "time": 11.99
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9459e-04",
+ "loss": 0.8945,
+ "slid_loss": 0.9465,
+ "step": 1221,
+ "time": 12.6
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9457e-04",
+ "loss": 1.0062,
+ "slid_loss": 0.948,
+ "step": 1222,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": "1.9456e-04",
+ "loss": 0.9828,
+ "slid_loss": 0.9474,
+ "step": 1223,
+ "time": 12.81
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9454e-04",
+ "loss": 0.9117,
+ "slid_loss": 0.9463,
+ "step": 1224,
+ "time": 12.93
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9453e-04",
+ "loss": 0.9372,
+ "slid_loss": 0.9457,
+ "step": 1225,
+ "time": 12.33
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9451e-04",
+ "loss": 0.8593,
+ "slid_loss": 0.9435,
+ "step": 1226,
+ "time": 12.15
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9450e-04",
+ "loss": 0.8272,
+ "slid_loss": 0.9414,
+ "step": 1227,
+ "time": 13.16
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9448e-04",
+ "loss": 0.9126,
+ "slid_loss": 0.9421,
+ "step": 1228,
+ "time": 13.34
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9447e-04",
+ "loss": 0.9673,
+ "slid_loss": 0.941,
+ "step": 1229,
+ "time": 13.63
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9445e-04",
+ "loss": 0.9833,
+ "slid_loss": 0.941,
+ "step": 1230,
+ "time": 12.94
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9444e-04",
+ "loss": 0.91,
+ "slid_loss": 0.9403,
+ "step": 1231,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9442e-04",
+ "loss": 0.8445,
+ "slid_loss": 0.939,
+ "step": 1232,
+ "time": 12.5
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": "1.9441e-04",
+ "loss": 0.9207,
+ "slid_loss": 0.9381,
+ "step": 1233,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9439e-04",
+ "loss": 0.9945,
+ "slid_loss": 0.9381,
+ "step": 1234,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9438e-04",
+ "loss": 0.929,
+ "slid_loss": 0.9375,
+ "step": 1235,
+ "time": 13.48
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9436e-04",
+ "loss": 1.0489,
+ "slid_loss": 0.939,
+ "step": 1236,
+ "time": 12.77
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9434e-04",
+ "loss": 0.8969,
+ "slid_loss": 0.9376,
+ "step": 1237,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9433e-04",
+ "loss": 0.8192,
+ "slid_loss": 0.9358,
+ "step": 1238,
+ "time": 12.33
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9431e-04",
+ "loss": 0.9099,
+ "slid_loss": 0.9359,
+ "step": 1239,
+ "time": 13.9
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9430e-04",
+ "loss": 0.8734,
+ "slid_loss": 0.9355,
+ "step": 1240,
+ "time": 13.06
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9428e-04",
+ "loss": 1.0521,
+ "slid_loss": 0.9368,
+ "step": 1241,
+ "time": 13.76
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9427e-04",
+ "loss": 0.9104,
+ "slid_loss": 0.9366,
+ "step": 1242,
+ "time": 12.37
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": "1.9425e-04",
+ "loss": 0.9726,
+ "slid_loss": 0.9369,
+ "step": 1243,
+ "time": 12.34
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9424e-04",
+ "loss": 0.9849,
+ "slid_loss": 0.9371,
+ "step": 1244,
+ "time": 13.33
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9422e-04",
+ "loss": 0.969,
+ "slid_loss": 0.9368,
+ "step": 1245,
+ "time": 13.84
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9420e-04",
+ "loss": 0.9812,
+ "slid_loss": 0.9377,
+ "step": 1246,
+ "time": 12.12
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9419e-04",
+ "loss": 0.9618,
+ "slid_loss": 0.9385,
+ "step": 1247,
+ "time": 13.37
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9417e-04",
+ "loss": 1.0197,
+ "slid_loss": 0.9387,
+ "step": 1248,
+ "time": 12.94
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9416e-04",
+ "loss": 0.922,
+ "slid_loss": 0.9396,
+ "step": 1249,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9414e-04",
+ "loss": 0.9085,
+ "slid_loss": 0.938,
+ "step": 1250,
+ "time": 13.54
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9413e-04",
+ "loss": 0.9196,
+ "slid_loss": 0.9383,
+ "step": 1251,
+ "time": 12.69
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9411e-04",
+ "loss": 0.9158,
+ "slid_loss": 0.9382,
+ "step": 1252,
+ "time": 14.82
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9409e-04",
+ "loss": 0.8973,
+ "slid_loss": 0.9376,
+ "step": 1253,
+ "time": 13.16
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": "1.9408e-04",
+ "loss": 1.0365,
+ "slid_loss": 0.9388,
+ "step": 1254,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9406e-04",
+ "loss": 0.9074,
+ "slid_loss": 0.9393,
+ "step": 1255,
+ "time": 13.29
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9405e-04",
+ "loss": 0.9828,
+ "slid_loss": 0.9392,
+ "step": 1256,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9403e-04",
+ "loss": 0.8463,
+ "slid_loss": 0.9385,
+ "step": 1257,
+ "time": 13.27
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9401e-04",
+ "loss": 0.9472,
+ "slid_loss": 0.9377,
+ "step": 1258,
+ "time": 12.79
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9400e-04",
+ "loss": 0.8637,
+ "slid_loss": 0.9369,
+ "step": 1259,
+ "time": 11.97
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9398e-04",
+ "loss": 0.9316,
+ "slid_loss": 0.938,
+ "step": 1260,
+ "time": 13.49
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9397e-04",
+ "loss": 0.9107,
+ "slid_loss": 0.9369,
+ "step": 1261,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9395e-04",
+ "loss": 1.0101,
+ "slid_loss": 0.9377,
+ "step": 1262,
+ "time": 14.05
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9393e-04",
+ "loss": 0.8851,
+ "slid_loss": 0.9361,
+ "step": 1263,
+ "time": 12.93
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": "1.9392e-04",
+ "loss": 0.9462,
+ "slid_loss": 0.9373,
+ "step": 1264,
+ "time": 13.76
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9390e-04",
+ "loss": 0.8597,
+ "slid_loss": 0.9362,
+ "step": 1265,
+ "time": 12.92
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9389e-04",
+ "loss": 0.9908,
+ "slid_loss": 0.9356,
+ "step": 1266,
+ "time": 13.31
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9387e-04",
+ "loss": 0.9866,
+ "slid_loss": 0.9357,
+ "step": 1267,
+ "time": 12.83
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9385e-04",
+ "loss": 1.0546,
+ "slid_loss": 0.9379,
+ "step": 1268,
+ "time": 13.26
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9384e-04",
+ "loss": 0.9093,
+ "slid_loss": 0.9375,
+ "step": 1269,
+ "time": 13.78
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9382e-04",
+ "loss": 0.927,
+ "slid_loss": 0.9367,
+ "step": 1270,
+ "time": 12.66
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9381e-04",
+ "loss": 0.9112,
+ "slid_loss": 0.9369,
+ "step": 1271,
+ "time": 14.14
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9379e-04",
+ "loss": 0.9583,
+ "slid_loss": 0.9369,
+ "step": 1272,
+ "time": 12.93
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9377e-04",
+ "loss": 0.8529,
+ "slid_loss": 0.9357,
+ "step": 1273,
+ "time": 11.22
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9376e-04",
+ "loss": 0.8961,
+ "slid_loss": 0.9347,
+ "step": 1274,
+ "time": 13.82
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": "1.9374e-04",
+ "loss": 0.9146,
+ "slid_loss": 0.9338,
+ "step": 1275,
+ "time": 12.86
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9373e-04",
+ "loss": 0.7996,
+ "slid_loss": 0.9335,
+ "step": 1276,
+ "time": 13.2
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9371e-04",
+ "loss": 0.91,
+ "slid_loss": 0.9333,
+ "step": 1277,
+ "time": 11.2
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9369e-04",
+ "loss": 0.9831,
+ "slid_loss": 0.9339,
+ "step": 1278,
+ "time": 13.9
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9368e-04",
+ "loss": 0.8653,
+ "slid_loss": 0.9334,
+ "step": 1279,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9366e-04",
+ "loss": 0.9086,
+ "slid_loss": 0.9325,
+ "step": 1280,
+ "time": 12.87
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9364e-04",
+ "loss": 0.9261,
+ "slid_loss": 0.9325,
+ "step": 1281,
+ "time": 13.7
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9363e-04",
+ "loss": 0.8862,
+ "slid_loss": 0.9323,
+ "step": 1282,
+ "time": 12.51
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9361e-04",
+ "loss": 0.9772,
+ "slid_loss": 0.9315,
+ "step": 1283,
+ "time": 12.96
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9359e-04",
+ "loss": 0.9081,
+ "slid_loss": 0.9318,
+ "step": 1284,
+ "time": 13.25
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": "1.9358e-04",
+ "loss": 0.8749,
+ "slid_loss": 0.9308,
+ "step": 1285,
+ "time": 14.41
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9356e-04",
+ "loss": 0.8213,
+ "slid_loss": 0.9299,
+ "step": 1286,
+ "time": 13.44
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9355e-04",
+ "loss": 0.9318,
+ "slid_loss": 0.9295,
+ "step": 1287,
+ "time": 11.23
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9353e-04",
+ "loss": 1.0045,
+ "slid_loss": 0.9308,
+ "step": 1288,
+ "time": 13.33
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9351e-04",
+ "loss": 0.9516,
+ "slid_loss": 0.9314,
+ "step": 1289,
+ "time": 14.13
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9350e-04",
+ "loss": 0.9447,
+ "slid_loss": 0.9315,
+ "step": 1290,
+ "time": 13.4
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9348e-04",
+ "loss": 0.9711,
+ "slid_loss": 0.9311,
+ "step": 1291,
+ "time": 11.32
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9346e-04",
+ "loss": 0.9436,
+ "slid_loss": 0.9308,
+ "step": 1292,
+ "time": 13.95
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9345e-04",
+ "loss": 0.9589,
+ "slid_loss": 0.9309,
+ "step": 1293,
+ "time": 13.25
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9343e-04",
+ "loss": 0.8479,
+ "slid_loss": 0.9315,
+ "step": 1294,
+ "time": 12.98
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9341e-04",
+ "loss": 0.9897,
+ "slid_loss": 0.9318,
+ "step": 1295,
+ "time": 11.22
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": "1.9340e-04",
+ "loss": 0.9148,
+ "slid_loss": 0.9326,
+ "step": 1296,
+ "time": 11.82
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9338e-04",
+ "loss": 1.0235,
+ "slid_loss": 0.9328,
+ "step": 1297,
+ "time": 13.63
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9336e-04",
+ "loss": 0.9835,
+ "slid_loss": 0.9333,
+ "step": 1298,
+ "time": 11.66
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9335e-04",
+ "loss": 0.9062,
+ "slid_loss": 0.9327,
+ "step": 1299,
+ "time": 12.05
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9333e-04",
+ "loss": 0.9723,
+ "slid_loss": 0.9336,
+ "step": 1300,
+ "time": 13.91
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9331e-04",
+ "loss": 0.9532,
+ "slid_loss": 0.933,
+ "step": 1301,
+ "time": 11.9
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9330e-04",
+ "loss": 0.9004,
+ "slid_loss": 0.9326,
+ "step": 1302,
+ "time": 13.8
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9328e-04",
+ "loss": 0.9414,
+ "slid_loss": 0.9329,
+ "step": 1303,
+ "time": 12.35
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9326e-04",
+ "loss": 0.9436,
+ "slid_loss": 0.933,
+ "step": 1304,
+ "time": 13.5
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9325e-04",
+ "loss": 0.8238,
+ "slid_loss": 0.9322,
+ "step": 1305,
+ "time": 11.37
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": "1.9323e-04",
+ "loss": 1.0016,
+ "slid_loss": 0.9332,
+ "step": 1306,
+ "time": 12.64
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9321e-04",
+ "loss": 0.9085,
+ "slid_loss": 0.9327,
+ "step": 1307,
+ "time": 13.22
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9319e-04",
+ "loss": 0.9078,
+ "slid_loss": 0.9317,
+ "step": 1308,
+ "time": 13.01
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9318e-04",
+ "loss": 0.9309,
+ "slid_loss": 0.9324,
+ "step": 1309,
+ "time": 13.63
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9316e-04",
+ "loss": 0.891,
+ "slid_loss": 0.9321,
+ "step": 1310,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9314e-04",
+ "loss": 0.9598,
+ "slid_loss": 0.9329,
+ "step": 1311,
+ "time": 14.35
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9313e-04",
+ "loss": 0.9336,
+ "slid_loss": 0.9325,
+ "step": 1312,
+ "time": 12.97
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9311e-04",
+ "loss": 0.9445,
+ "slid_loss": 0.9328,
+ "step": 1313,
+ "time": 13.43
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9309e-04",
+ "loss": 0.8321,
+ "slid_loss": 0.9314,
+ "step": 1314,
+ "time": 12.81
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9308e-04",
+ "loss": 0.9243,
+ "slid_loss": 0.9319,
+ "step": 1315,
+ "time": 10.95
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": "1.9306e-04",
+ "loss": 0.9683,
+ "slid_loss": 0.9325,
+ "step": 1316,
+ "time": 12.56
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9304e-04",
+ "loss": 0.9784,
+ "slid_loss": 0.9335,
+ "step": 1317,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9302e-04",
+ "loss": 0.9859,
+ "slid_loss": 0.9324,
+ "step": 1318,
+ "time": 13.12
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9301e-04",
+ "loss": 0.9041,
+ "slid_loss": 0.9319,
+ "step": 1319,
+ "time": 11.59
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9299e-04",
+ "loss": 0.9599,
+ "slid_loss": 0.9317,
+ "step": 1320,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9297e-04",
+ "loss": 0.9074,
+ "slid_loss": 0.9318,
+ "step": 1321,
+ "time": 13.88
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9296e-04",
+ "loss": 0.9607,
+ "slid_loss": 0.9314,
+ "step": 1322,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9294e-04",
+ "loss": 0.8908,
+ "slid_loss": 0.9305,
+ "step": 1323,
+ "time": 13.28
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9292e-04",
+ "loss": 1.0095,
+ "slid_loss": 0.9314,
+ "step": 1324,
+ "time": 12.29
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9290e-04",
+ "loss": 0.9919,
+ "slid_loss": 0.932,
+ "step": 1325,
+ "time": 12.97
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9289e-04",
+ "loss": 0.9379,
+ "slid_loss": 0.9328,
+ "step": 1326,
+ "time": 12.77
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": "1.9287e-04",
+ "loss": 0.919,
+ "slid_loss": 0.9337,
+ "step": 1327,
+ "time": 13.89
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9285e-04",
+ "loss": 0.8798,
+ "slid_loss": 0.9334,
+ "step": 1328,
+ "time": 11.31
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9284e-04",
+ "loss": 0.8813,
+ "slid_loss": 0.9325,
+ "step": 1329,
+ "time": 13.22
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9282e-04",
+ "loss": 0.8782,
+ "slid_loss": 0.9314,
+ "step": 1330,
+ "time": 12.82
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9280e-04",
+ "loss": 0.8523,
+ "slid_loss": 0.9309,
+ "step": 1331,
+ "time": 11.85
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9278e-04",
+ "loss": 0.8792,
+ "slid_loss": 0.9312,
+ "step": 1332,
+ "time": 13.49
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9277e-04",
+ "loss": 0.8811,
+ "slid_loss": 0.9308,
+ "step": 1333,
+ "time": 12.92
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9275e-04",
+ "loss": 0.9351,
+ "slid_loss": 0.9302,
+ "step": 1334,
+ "time": 11.72
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9273e-04",
+ "loss": 0.7796,
+ "slid_loss": 0.9287,
+ "step": 1335,
+ "time": 12.32
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9271e-04",
+ "loss": 0.9233,
+ "slid_loss": 0.9275,
+ "step": 1336,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": "1.9270e-04",
+ "loss": 0.9651,
+ "slid_loss": 0.9282,
+ "step": 1337,
+ "time": 12.86
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9268e-04",
+ "loss": 0.9624,
+ "slid_loss": 0.9296,
+ "step": 1338,
+ "time": 13.34
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9266e-04",
+ "loss": 0.9484,
+ "slid_loss": 0.93,
+ "step": 1339,
+ "time": 12.46
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9264e-04",
+ "loss": 1.039,
+ "slid_loss": 0.9316,
+ "step": 1340,
+ "time": 11.74
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9263e-04",
+ "loss": 1.0187,
+ "slid_loss": 0.9313,
+ "step": 1341,
+ "time": 11.79
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9261e-04",
+ "loss": 0.998,
+ "slid_loss": 0.9322,
+ "step": 1342,
+ "time": 13.88
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9259e-04",
+ "loss": 1.1592,
+ "slid_loss": 0.934,
+ "step": 1343,
+ "time": 12.98
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9257e-04",
+ "loss": 0.9156,
+ "slid_loss": 0.9333,
+ "step": 1344,
+ "time": 13.57
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9256e-04",
+ "loss": 0.8495,
+ "slid_loss": 0.9321,
+ "step": 1345,
+ "time": 11.52
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9254e-04",
+ "loss": 0.9682,
+ "slid_loss": 0.932,
+ "step": 1346,
+ "time": 12.93
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9252e-04",
+ "loss": 0.9507,
+ "slid_loss": 0.9319,
+ "step": 1347,
+ "time": 13.44
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": "1.9250e-04",
+ "loss": 0.9336,
+ "slid_loss": 0.931,
+ "step": 1348,
+ "time": 13.15
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9249e-04",
+ "loss": 0.8783,
+ "slid_loss": 0.9306,
+ "step": 1349,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9247e-04",
+ "loss": 0.8815,
+ "slid_loss": 0.9303,
+ "step": 1350,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9245e-04",
+ "loss": 0.9405,
+ "slid_loss": 0.9305,
+ "step": 1351,
+ "time": 13.17
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9243e-04",
+ "loss": 0.9371,
+ "slid_loss": 0.9308,
+ "step": 1352,
+ "time": 14.09
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9242e-04",
+ "loss": 0.8862,
+ "slid_loss": 0.9307,
+ "step": 1353,
+ "time": 12.3
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9240e-04",
+ "loss": 0.9513,
+ "slid_loss": 0.9298,
+ "step": 1354,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9238e-04",
+ "loss": 0.9643,
+ "slid_loss": 0.9304,
+ "step": 1355,
+ "time": 12.95
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9236e-04",
+ "loss": 1.0462,
+ "slid_loss": 0.931,
+ "step": 1356,
+ "time": 12.47
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9234e-04",
+ "loss": 0.8614,
+ "slid_loss": 0.9312,
+ "step": 1357,
+ "time": 13.2
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": "1.9233e-04",
+ "loss": 0.8833,
+ "slid_loss": 0.9305,
+ "step": 1358,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9231e-04",
+ "loss": 0.95,
+ "slid_loss": 0.9314,
+ "step": 1359,
+ "time": 12.95
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9229e-04",
+ "loss": 0.8565,
+ "slid_loss": 0.9306,
+ "step": 1360,
+ "time": 12.25
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9227e-04",
+ "loss": 0.9033,
+ "slid_loss": 0.9306,
+ "step": 1361,
+ "time": 12.2
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9225e-04",
+ "loss": 0.952,
+ "slid_loss": 0.93,
+ "step": 1362,
+ "time": 14.01
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9224e-04",
+ "loss": 0.978,
+ "slid_loss": 0.9309,
+ "step": 1363,
+ "time": 13.17
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9222e-04",
+ "loss": 0.96,
+ "slid_loss": 0.931,
+ "step": 1364,
+ "time": 13.9
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9220e-04",
+ "loss": 0.9698,
+ "slid_loss": 0.9321,
+ "step": 1365,
+ "time": 12.95
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9218e-04",
+ "loss": 0.9172,
+ "slid_loss": 0.9314,
+ "step": 1366,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9216e-04",
+ "loss": 0.8797,
+ "slid_loss": 0.9303,
+ "step": 1367,
+ "time": 11.34
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": "1.9215e-04",
+ "loss": 0.8177,
+ "slid_loss": 0.928,
+ "step": 1368,
+ "time": 13.48
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9213e-04",
+ "loss": 0.8938,
+ "slid_loss": 0.9278,
+ "step": 1369,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9211e-04",
+ "loss": 0.8856,
+ "slid_loss": 0.9274,
+ "step": 1370,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9209e-04",
+ "loss": 0.893,
+ "slid_loss": 0.9272,
+ "step": 1371,
+ "time": 12.59
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9207e-04",
+ "loss": 0.891,
+ "slid_loss": 0.9265,
+ "step": 1372,
+ "time": 13.51
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9206e-04",
+ "loss": 0.8974,
+ "slid_loss": 0.927,
+ "step": 1373,
+ "time": 11.92
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9204e-04",
+ "loss": 0.7257,
+ "slid_loss": 0.9253,
+ "step": 1374,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9202e-04",
+ "loss": 0.9647,
+ "slid_loss": 0.9258,
+ "step": 1375,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9200e-04",
+ "loss": 0.9692,
+ "slid_loss": 0.9275,
+ "step": 1376,
+ "time": 13.85
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9198e-04",
+ "loss": 0.905,
+ "slid_loss": 0.9274,
+ "step": 1377,
+ "time": 13.91
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9197e-04",
+ "loss": 0.9273,
+ "slid_loss": 0.9269,
+ "step": 1378,
+ "time": 12.93
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": "1.9195e-04",
+ "loss": 0.859,
+ "slid_loss": 0.9268,
+ "step": 1379,
+ "time": 13.49
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9193e-04",
+ "loss": 0.963,
+ "slid_loss": 0.9274,
+ "step": 1380,
+ "time": 13.78
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9191e-04",
+ "loss": 0.8228,
+ "slid_loss": 0.9263,
+ "step": 1381,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9189e-04",
+ "loss": 0.8531,
+ "slid_loss": 0.926,
+ "step": 1382,
+ "time": 11.94
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9187e-04",
+ "loss": 1.0377,
+ "slid_loss": 0.9266,
+ "step": 1383,
+ "time": 13.65
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9186e-04",
+ "loss": 0.8803,
+ "slid_loss": 0.9263,
+ "step": 1384,
+ "time": 14.56
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9184e-04",
+ "loss": 0.9668,
+ "slid_loss": 0.9272,
+ "step": 1385,
+ "time": 13.65
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9182e-04",
+ "loss": 0.9109,
+ "slid_loss": 0.9281,
+ "step": 1386,
+ "time": 13.87
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9180e-04",
+ "loss": 0.9611,
+ "slid_loss": 0.9284,
+ "step": 1387,
+ "time": 13.5
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9178e-04",
+ "loss": 0.952,
+ "slid_loss": 0.9279,
+ "step": 1388,
+ "time": 12.96
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": "1.9176e-04",
+ "loss": 0.9555,
+ "slid_loss": 0.9279,
+ "step": 1389,
+ "time": 13.43
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9175e-04",
+ "loss": 0.9054,
+ "slid_loss": 0.9275,
+ "step": 1390,
+ "time": 13.61
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9173e-04",
+ "loss": 0.8712,
+ "slid_loss": 0.9265,
+ "step": 1391,
+ "time": 13.7
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9171e-04",
+ "loss": 0.8818,
+ "slid_loss": 0.9259,
+ "step": 1392,
+ "time": 14.27
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9169e-04",
+ "loss": 0.9165,
+ "slid_loss": 0.9255,
+ "step": 1393,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9167e-04",
+ "loss": 0.8316,
+ "slid_loss": 0.9253,
+ "step": 1394,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9165e-04",
+ "loss": 0.9135,
+ "slid_loss": 0.9246,
+ "step": 1395,
+ "time": 13.16
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9163e-04",
+ "loss": 1.0035,
+ "slid_loss": 0.9255,
+ "step": 1396,
+ "time": 13.32
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9162e-04",
+ "loss": 0.8899,
+ "slid_loss": 0.9241,
+ "step": 1397,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9160e-04",
+ "loss": 0.8773,
+ "slid_loss": 0.9231,
+ "step": 1398,
+ "time": 13.5
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9158e-04",
+ "loss": 1.0457,
+ "slid_loss": 0.9245,
+ "step": 1399,
+ "time": 12.88
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": "1.9156e-04",
+ "loss": 0.8457,
+ "slid_loss": 0.9232,
+ "step": 1400,
+ "time": 13.48
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9154e-04",
+ "loss": 0.9269,
+ "slid_loss": 0.9229,
+ "step": 1401,
+ "time": 12.76
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9152e-04",
+ "loss": 0.9535,
+ "slid_loss": 0.9235,
+ "step": 1402,
+ "time": 13.0
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9150e-04",
+ "loss": 0.8583,
+ "slid_loss": 0.9226,
+ "step": 1403,
+ "time": 11.4
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9148e-04",
+ "loss": 0.8369,
+ "slid_loss": 0.9216,
+ "step": 1404,
+ "time": 14.11
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9147e-04",
+ "loss": 0.8748,
+ "slid_loss": 0.9221,
+ "step": 1405,
+ "time": 12.94
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9145e-04",
+ "loss": 0.8523,
+ "slid_loss": 0.9206,
+ "step": 1406,
+ "time": 12.46
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9143e-04",
+ "loss": 0.8802,
+ "slid_loss": 0.9203,
+ "step": 1407,
+ "time": 11.21
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9141e-04",
+ "loss": 0.9475,
+ "slid_loss": 0.9207,
+ "step": 1408,
+ "time": 13.26
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9139e-04",
+ "loss": 0.8981,
+ "slid_loss": 0.9204,
+ "step": 1409,
+ "time": 13.84
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": "1.9137e-04",
+ "loss": 0.927,
+ "slid_loss": 0.9207,
+ "step": 1410,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9135e-04",
+ "loss": 0.928,
+ "slid_loss": 0.9204,
+ "step": 1411,
+ "time": 13.07
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9133e-04",
+ "loss": 0.8298,
+ "slid_loss": 0.9194,
+ "step": 1412,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9132e-04",
+ "loss": 1.035,
+ "slid_loss": 0.9203,
+ "step": 1413,
+ "time": 13.55
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9130e-04",
+ "loss": 0.9949,
+ "slid_loss": 0.9219,
+ "step": 1414,
+ "time": 13.44
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9128e-04",
+ "loss": 0.9171,
+ "slid_loss": 0.9218,
+ "step": 1415,
+ "time": 11.89
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9126e-04",
+ "loss": 0.9529,
+ "slid_loss": 0.9217,
+ "step": 1416,
+ "time": 13.42
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9124e-04",
+ "loss": 0.8169,
+ "slid_loss": 0.9201,
+ "step": 1417,
+ "time": 12.26
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9122e-04",
+ "loss": 0.8369,
+ "slid_loss": 0.9186,
+ "step": 1418,
+ "time": 11.29
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9120e-04",
+ "loss": 0.992,
+ "slid_loss": 0.9195,
+ "step": 1419,
+ "time": 11.7
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": "1.9118e-04",
+ "loss": 0.9001,
+ "slid_loss": 0.9189,
+ "step": 1420,
+ "time": 11.41
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9116e-04",
+ "loss": 0.9272,
+ "slid_loss": 0.9191,
+ "step": 1421,
+ "time": 12.84
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9114e-04",
+ "loss": 0.9079,
+ "slid_loss": 0.9185,
+ "step": 1422,
+ "time": 14.34
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9113e-04",
+ "loss": 0.8037,
+ "slid_loss": 0.9177,
+ "step": 1423,
+ "time": 12.88
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9111e-04",
+ "loss": 0.9645,
+ "slid_loss": 0.9172,
+ "step": 1424,
+ "time": 12.84
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9109e-04",
+ "loss": 1.004,
+ "slid_loss": 0.9173,
+ "step": 1425,
+ "time": 12.87
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9107e-04",
+ "loss": 0.8612,
+ "slid_loss": 0.9166,
+ "step": 1426,
+ "time": 11.88
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9105e-04",
+ "loss": 0.9573,
+ "slid_loss": 0.9169,
+ "step": 1427,
+ "time": 11.63
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9103e-04",
+ "loss": 0.9261,
+ "slid_loss": 0.9174,
+ "step": 1428,
+ "time": 13.32
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9101e-04",
+ "loss": 0.9471,
+ "slid_loss": 0.9181,
+ "step": 1429,
+ "time": 13.02
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9099e-04",
+ "loss": 0.8639,
+ "slid_loss": 0.9179,
+ "step": 1430,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": "1.9097e-04",
+ "loss": 0.8136,
+ "slid_loss": 0.9175,
+ "step": 1431,
+ "time": 11.17
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9095e-04",
+ "loss": 0.8149,
+ "slid_loss": 0.9169,
+ "step": 1432,
+ "time": 13.22
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9093e-04",
+ "loss": 1.0393,
+ "slid_loss": 0.9185,
+ "step": 1433,
+ "time": 11.8
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9091e-04",
+ "loss": 0.9198,
+ "slid_loss": 0.9183,
+ "step": 1434,
+ "time": 12.72
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9090e-04",
+ "loss": 0.9457,
+ "slid_loss": 0.92,
+ "step": 1435,
+ "time": 11.38
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9088e-04",
+ "loss": 0.8744,
+ "slid_loss": 0.9195,
+ "step": 1436,
+ "time": 12.22
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9086e-04",
+ "loss": 0.8906,
+ "slid_loss": 0.9188,
+ "step": 1437,
+ "time": 13.76
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9084e-04",
+ "loss": 0.8405,
+ "slid_loss": 0.9175,
+ "step": 1438,
+ "time": 14.1
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9082e-04",
+ "loss": 0.8777,
+ "slid_loss": 0.9168,
+ "step": 1439,
+ "time": 11.41
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9080e-04",
+ "loss": 0.8603,
+ "slid_loss": 0.915,
+ "step": 1440,
+ "time": 12.53
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": "1.9078e-04",
+ "loss": 0.8282,
+ "slid_loss": 0.9131,
+ "step": 1441,
+ "time": 12.04
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9076e-04",
+ "loss": 0.8724,
+ "slid_loss": 0.9119,
+ "step": 1442,
+ "time": 14.19
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9074e-04",
+ "loss": 0.8638,
+ "slid_loss": 0.9089,
+ "step": 1443,
+ "time": 13.05
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9072e-04",
+ "loss": 0.8757,
+ "slid_loss": 0.9085,
+ "step": 1444,
+ "time": 13.8
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9070e-04",
+ "loss": 0.8016,
+ "slid_loss": 0.908,
+ "step": 1445,
+ "time": 11.84
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9068e-04",
+ "loss": 0.8614,
+ "slid_loss": 0.907,
+ "step": 1446,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9066e-04",
+ "loss": 0.8978,
+ "slid_loss": 0.9064,
+ "step": 1447,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9064e-04",
+ "loss": 0.8227,
+ "slid_loss": 0.9053,
+ "step": 1448,
+ "time": 12.59
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9062e-04",
+ "loss": 0.9705,
+ "slid_loss": 0.9063,
+ "step": 1449,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9060e-04",
+ "loss": 0.8924,
+ "slid_loss": 0.9064,
+ "step": 1450,
+ "time": 12.7
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9058e-04",
+ "loss": 0.8413,
+ "slid_loss": 0.9054,
+ "step": 1451,
+ "time": 13.16
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": "1.9056e-04",
+ "loss": 0.9253,
+ "slid_loss": 0.9053,
+ "step": 1452,
+ "time": 13.83
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9055e-04",
+ "loss": 0.7543,
+ "slid_loss": 0.9039,
+ "step": 1453,
+ "time": 12.87
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9053e-04",
+ "loss": 0.8943,
+ "slid_loss": 0.9034,
+ "step": 1454,
+ "time": 13.03
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9051e-04",
+ "loss": 0.9155,
+ "slid_loss": 0.9029,
+ "step": 1455,
+ "time": 12.89
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9049e-04",
+ "loss": 0.954,
+ "slid_loss": 0.902,
+ "step": 1456,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9047e-04",
+ "loss": 0.8764,
+ "slid_loss": 0.9021,
+ "step": 1457,
+ "time": 12.04
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9045e-04",
+ "loss": 0.9024,
+ "slid_loss": 0.9023,
+ "step": 1458,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9043e-04",
+ "loss": 0.9397,
+ "slid_loss": 0.9022,
+ "step": 1459,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9041e-04",
+ "loss": 0.807,
+ "slid_loss": 0.9017,
+ "step": 1460,
+ "time": 12.98
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9039e-04",
+ "loss": 0.8565,
+ "slid_loss": 0.9012,
+ "step": 1461,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": "1.9037e-04",
+ "loss": 0.9582,
+ "slid_loss": 0.9013,
+ "step": 1462,
+ "time": 13.57
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9035e-04",
+ "loss": 0.8449,
+ "slid_loss": 0.9,
+ "step": 1463,
+ "time": 13.23
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9033e-04",
+ "loss": 0.8039,
+ "slid_loss": 0.8984,
+ "step": 1464,
+ "time": 13.14
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9031e-04",
+ "loss": 0.8375,
+ "slid_loss": 0.8971,
+ "step": 1465,
+ "time": 11.24
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9029e-04",
+ "loss": 0.8942,
+ "slid_loss": 0.8969,
+ "step": 1466,
+ "time": 13.91
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9027e-04",
+ "loss": 0.8093,
+ "slid_loss": 0.8961,
+ "step": 1467,
+ "time": 12.82
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9025e-04",
+ "loss": 0.9197,
+ "slid_loss": 0.8972,
+ "step": 1468,
+ "time": 11.49
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9023e-04",
+ "loss": 0.8309,
+ "slid_loss": 0.8965,
+ "step": 1469,
+ "time": 13.62
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9021e-04",
+ "loss": 0.7911,
+ "slid_loss": 0.8956,
+ "step": 1470,
+ "time": 11.29
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9019e-04",
+ "loss": 0.9153,
+ "slid_loss": 0.8958,
+ "step": 1471,
+ "time": 13.95
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9017e-04",
+ "loss": 0.8722,
+ "slid_loss": 0.8956,
+ "step": 1472,
+ "time": 14.05
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": "1.9015e-04",
+ "loss": 0.8862,
+ "slid_loss": 0.8955,
+ "step": 1473,
+ "time": 11.82
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.9013e-04",
+ "loss": 0.8181,
+ "slid_loss": 0.8964,
+ "step": 1474,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.9011e-04",
+ "loss": 0.8973,
+ "slid_loss": 0.8958,
+ "step": 1475,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.9009e-04",
+ "loss": 0.9214,
+ "slid_loss": 0.8953,
+ "step": 1476,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.9007e-04",
+ "loss": 0.9046,
+ "slid_loss": 0.8953,
+ "step": 1477,
+ "time": 11.75
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.9005e-04",
+ "loss": 0.839,
+ "slid_loss": 0.8944,
+ "step": 1478,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.9003e-04",
+ "loss": 0.8992,
+ "slid_loss": 0.8948,
+ "step": 1479,
+ "time": 13.71
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.9001e-04",
+ "loss": 0.9709,
+ "slid_loss": 0.8949,
+ "step": 1480,
+ "time": 13.57
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.8999e-04",
+ "loss": 0.9609,
+ "slid_loss": 0.8963,
+ "step": 1481,
+ "time": 11.21
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.8997e-04",
+ "loss": 0.834,
+ "slid_loss": 0.8961,
+ "step": 1482,
+ "time": 13.02
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": "1.8995e-04",
+ "loss": 0.8869,
+ "slid_loss": 0.8946,
+ "step": 1483,
+ "time": 12.7
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8993e-04",
+ "loss": 0.8648,
+ "slid_loss": 0.8944,
+ "step": 1484,
+ "time": 12.16
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8991e-04",
+ "loss": 0.911,
+ "slid_loss": 0.8939,
+ "step": 1485,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8989e-04",
+ "loss": 0.9668,
+ "slid_loss": 0.8944,
+ "step": 1486,
+ "time": 11.48
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8987e-04",
+ "loss": 0.8711,
+ "slid_loss": 0.8935,
+ "step": 1487,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8985e-04",
+ "loss": 0.9204,
+ "slid_loss": 0.8932,
+ "step": 1488,
+ "time": 13.33
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8983e-04",
+ "loss": 0.9283,
+ "slid_loss": 0.8929,
+ "step": 1489,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8981e-04",
+ "loss": 0.8535,
+ "slid_loss": 0.8924,
+ "step": 1490,
+ "time": 13.82
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8979e-04",
+ "loss": 0.8063,
+ "slid_loss": 0.8918,
+ "step": 1491,
+ "time": 12.81
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8977e-04",
+ "loss": 0.8354,
+ "slid_loss": 0.8913,
+ "step": 1492,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": "1.8975e-04",
+ "loss": 0.8483,
+ "slid_loss": 0.8906,
+ "step": 1493,
+ "time": 13.87
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8973e-04",
+ "loss": 0.9289,
+ "slid_loss": 0.8916,
+ "step": 1494,
+ "time": 13.29
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8971e-04",
+ "loss": 0.8886,
+ "slid_loss": 0.8913,
+ "step": 1495,
+ "time": 14.17
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8968e-04",
+ "loss": 0.9355,
+ "slid_loss": 0.8907,
+ "step": 1496,
+ "time": 14.97
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8966e-04",
+ "loss": 0.8532,
+ "slid_loss": 0.8903,
+ "step": 1497,
+ "time": 11.54
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8964e-04",
+ "loss": 0.8421,
+ "slid_loss": 0.8899,
+ "step": 1498,
+ "time": 13.54
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8962e-04",
+ "loss": 0.8613,
+ "slid_loss": 0.8881,
+ "step": 1499,
+ "time": 14.04
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8960e-04",
+ "loss": 0.7498,
+ "slid_loss": 0.8871,
+ "step": 1500,
+ "time": 11.3
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8958e-04",
+ "loss": 0.8904,
+ "slid_loss": 0.8868,
+ "step": 1501,
+ "time": 12.85
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8956e-04",
+ "loss": 0.885,
+ "slid_loss": 0.8861,
+ "step": 1502,
+ "time": 13.59
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8954e-04",
+ "loss": 0.7351,
+ "slid_loss": 0.8848,
+ "step": 1503,
+ "time": 12.56
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": "1.8952e-04",
+ "loss": 0.8382,
+ "slid_loss": 0.8849,
+ "step": 1504,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8950e-04",
+ "loss": 0.9013,
+ "slid_loss": 0.8851,
+ "step": 1505,
+ "time": 13.07
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8948e-04",
+ "loss": 0.8669,
+ "slid_loss": 0.8853,
+ "step": 1506,
+ "time": 14.11
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8946e-04",
+ "loss": 0.9332,
+ "slid_loss": 0.8858,
+ "step": 1507,
+ "time": 14.78
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8944e-04",
+ "loss": 0.8608,
+ "slid_loss": 0.8849,
+ "step": 1508,
+ "time": 12.94
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8942e-04",
+ "loss": 0.9714,
+ "slid_loss": 0.8857,
+ "step": 1509,
+ "time": 11.57
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8940e-04",
+ "loss": 0.9011,
+ "slid_loss": 0.8854,
+ "step": 1510,
+ "time": 11.3
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8938e-04",
+ "loss": 0.855,
+ "slid_loss": 0.8847,
+ "step": 1511,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8936e-04",
+ "loss": 0.9487,
+ "slid_loss": 0.8859,
+ "step": 1512,
+ "time": 13.48
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8934e-04",
+ "loss": 0.8822,
+ "slid_loss": 0.8843,
+ "step": 1513,
+ "time": 14.25
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": "1.8931e-04",
+ "loss": 0.7983,
+ "slid_loss": 0.8824,
+ "step": 1514,
+ "time": 13.96
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8929e-04",
+ "loss": 1.0528,
+ "slid_loss": 0.8837,
+ "step": 1515,
+ "time": 11.67
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8927e-04",
+ "loss": 0.8597,
+ "slid_loss": 0.8828,
+ "step": 1516,
+ "time": 14.21
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8925e-04",
+ "loss": 0.9117,
+ "slid_loss": 0.8838,
+ "step": 1517,
+ "time": 12.88
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8923e-04",
+ "loss": 0.914,
+ "slid_loss": 0.8845,
+ "step": 1518,
+ "time": 13.08
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8921e-04",
+ "loss": 0.9285,
+ "slid_loss": 0.8839,
+ "step": 1519,
+ "time": 14.04
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8919e-04",
+ "loss": 0.8524,
+ "slid_loss": 0.8834,
+ "step": 1520,
+ "time": 13.89
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8917e-04",
+ "loss": 0.8491,
+ "slid_loss": 0.8826,
+ "step": 1521,
+ "time": 13.37
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8915e-04",
+ "loss": 0.8738,
+ "slid_loss": 0.8823,
+ "step": 1522,
+ "time": 12.14
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8913e-04",
+ "loss": 0.7718,
+ "slid_loss": 0.882,
+ "step": 1523,
+ "time": 12.83
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8911e-04",
+ "loss": 0.8818,
+ "slid_loss": 0.8811,
+ "step": 1524,
+ "time": 13.23
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": "1.8909e-04",
+ "loss": 0.9648,
+ "slid_loss": 0.8807,
+ "step": 1525,
+ "time": 13.87
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8906e-04",
+ "loss": 0.9511,
+ "slid_loss": 0.8816,
+ "step": 1526,
+ "time": 13.31
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8904e-04",
+ "loss": 0.9515,
+ "slid_loss": 0.8816,
+ "step": 1527,
+ "time": 13.25
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8902e-04",
+ "loss": 0.8144,
+ "slid_loss": 0.8805,
+ "step": 1528,
+ "time": 11.91
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8900e-04",
+ "loss": 0.8155,
+ "slid_loss": 0.8792,
+ "step": 1529,
+ "time": 13.78
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8898e-04",
+ "loss": 0.9527,
+ "slid_loss": 0.88,
+ "step": 1530,
+ "time": 12.25
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8896e-04",
+ "loss": 0.8869,
+ "slid_loss": 0.8808,
+ "step": 1531,
+ "time": 12.95
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8894e-04",
+ "loss": 0.9171,
+ "slid_loss": 0.8818,
+ "step": 1532,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8892e-04",
+ "loss": 0.8461,
+ "slid_loss": 0.8799,
+ "step": 1533,
+ "time": 13.2
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8890e-04",
+ "loss": 0.9913,
+ "slid_loss": 0.8806,
+ "step": 1534,
+ "time": 13.25
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": "1.8888e-04",
+ "loss": 0.8733,
+ "slid_loss": 0.8799,
+ "step": 1535,
+ "time": 13.92
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8885e-04",
+ "loss": 0.8552,
+ "slid_loss": 0.8797,
+ "step": 1536,
+ "time": 13.56
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8883e-04",
+ "loss": 0.9202,
+ "slid_loss": 0.88,
+ "step": 1537,
+ "time": 12.99
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8881e-04",
+ "loss": 0.912,
+ "slid_loss": 0.8807,
+ "step": 1538,
+ "time": 12.22
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8879e-04",
+ "loss": 0.8168,
+ "slid_loss": 0.8801,
+ "step": 1539,
+ "time": 12.98
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8877e-04",
+ "loss": 1.0205,
+ "slid_loss": 0.8817,
+ "step": 1540,
+ "time": 13.17
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8875e-04",
+ "loss": 1.0057,
+ "slid_loss": 0.8834,
+ "step": 1541,
+ "time": 12.94
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8873e-04",
+ "loss": 0.8408,
+ "slid_loss": 0.8831,
+ "step": 1542,
+ "time": 14.32
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8871e-04",
+ "loss": 0.8673,
+ "slid_loss": 0.8832,
+ "step": 1543,
+ "time": 11.5
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8869e-04",
+ "loss": 0.861,
+ "slid_loss": 0.883,
+ "step": 1544,
+ "time": 13.27
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": "1.8866e-04",
+ "loss": 0.9508,
+ "slid_loss": 0.8845,
+ "step": 1545,
+ "time": 12.24
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8864e-04",
+ "loss": 0.8187,
+ "slid_loss": 0.8841,
+ "step": 1546,
+ "time": 12.26
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8862e-04",
+ "loss": 0.8771,
+ "slid_loss": 0.8839,
+ "step": 1547,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8860e-04",
+ "loss": 0.8355,
+ "slid_loss": 0.884,
+ "step": 1548,
+ "time": 13.2
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8858e-04",
+ "loss": 0.8385,
+ "slid_loss": 0.8827,
+ "step": 1549,
+ "time": 14.0
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8856e-04",
+ "loss": 0.8897,
+ "slid_loss": 0.8827,
+ "step": 1550,
+ "time": 14.35
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8854e-04",
+ "loss": 0.8735,
+ "slid_loss": 0.883,
+ "step": 1551,
+ "time": 12.77
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8851e-04",
+ "loss": 0.8096,
+ "slid_loss": 0.8818,
+ "step": 1552,
+ "time": 12.4
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8849e-04",
+ "loss": 0.8893,
+ "slid_loss": 0.8832,
+ "step": 1553,
+ "time": 14.18
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8847e-04",
+ "loss": 0.8604,
+ "slid_loss": 0.8828,
+ "step": 1554,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8845e-04",
+ "loss": 0.8769,
+ "slid_loss": 0.8824,
+ "step": 1555,
+ "time": 13.25
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": "1.8843e-04",
+ "loss": 0.9607,
+ "slid_loss": 0.8825,
+ "step": 1556,
+ "time": 13.65
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8841e-04",
+ "loss": 0.9653,
+ "slid_loss": 0.8834,
+ "step": 1557,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8839e-04",
+ "loss": 0.7939,
+ "slid_loss": 0.8823,
+ "step": 1558,
+ "time": 12.87
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8836e-04",
+ "loss": 0.7867,
+ "slid_loss": 0.8808,
+ "step": 1559,
+ "time": 13.52
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8834e-04",
+ "loss": 0.8356,
+ "slid_loss": 0.8811,
+ "step": 1560,
+ "time": 14.4
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8832e-04",
+ "loss": 0.8589,
+ "slid_loss": 0.8811,
+ "step": 1561,
+ "time": 13.99
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8830e-04",
+ "loss": 0.8716,
+ "slid_loss": 0.8802,
+ "step": 1562,
+ "time": 13.82
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8828e-04",
+ "loss": 0.9385,
+ "slid_loss": 0.8812,
+ "step": 1563,
+ "time": 12.86
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8826e-04",
+ "loss": 0.8067,
+ "slid_loss": 0.8812,
+ "step": 1564,
+ "time": 13.06
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8824e-04",
+ "loss": 0.8867,
+ "slid_loss": 0.8817,
+ "step": 1565,
+ "time": 14.02
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": "1.8821e-04",
+ "loss": 0.9579,
+ "slid_loss": 0.8823,
+ "step": 1566,
+ "time": 11.32
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8819e-04",
+ "loss": 0.8706,
+ "slid_loss": 0.8829,
+ "step": 1567,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8817e-04",
+ "loss": 0.924,
+ "slid_loss": 0.883,
+ "step": 1568,
+ "time": 13.54
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8815e-04",
+ "loss": 0.9206,
+ "slid_loss": 0.8839,
+ "step": 1569,
+ "time": 11.96
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8813e-04",
+ "loss": 0.8313,
+ "slid_loss": 0.8843,
+ "step": 1570,
+ "time": 13.49
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8811e-04",
+ "loss": 0.9262,
+ "slid_loss": 0.8844,
+ "step": 1571,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8808e-04",
+ "loss": 0.8273,
+ "slid_loss": 0.8839,
+ "step": 1572,
+ "time": 14.18
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8806e-04",
+ "loss": 0.8284,
+ "slid_loss": 0.8834,
+ "step": 1573,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8804e-04",
+ "loss": 0.8375,
+ "slid_loss": 0.8835,
+ "step": 1574,
+ "time": 13.52
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8802e-04",
+ "loss": 0.9035,
+ "slid_loss": 0.8836,
+ "step": 1575,
+ "time": 13.32
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8800e-04",
+ "loss": 0.9119,
+ "slid_loss": 0.8835,
+ "step": 1576,
+ "time": 13.91
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": "1.8797e-04",
+ "loss": 0.8213,
+ "slid_loss": 0.8827,
+ "step": 1577,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8795e-04",
+ "loss": 0.9077,
+ "slid_loss": 0.8834,
+ "step": 1578,
+ "time": 12.9
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8793e-04",
+ "loss": 0.8485,
+ "slid_loss": 0.8829,
+ "step": 1579,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8791e-04",
+ "loss": 0.8139,
+ "slid_loss": 0.8813,
+ "step": 1580,
+ "time": 12.34
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8789e-04",
+ "loss": 0.922,
+ "slid_loss": 0.8809,
+ "step": 1581,
+ "time": 14.02
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8787e-04",
+ "loss": 0.9958,
+ "slid_loss": 0.8825,
+ "step": 1582,
+ "time": 13.22
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8784e-04",
+ "loss": 0.938,
+ "slid_loss": 0.883,
+ "step": 1583,
+ "time": 13.64
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8782e-04",
+ "loss": 0.8719,
+ "slid_loss": 0.8831,
+ "step": 1584,
+ "time": 13.45
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8780e-04",
+ "loss": 0.803,
+ "slid_loss": 0.882,
+ "step": 1585,
+ "time": 14.42
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8778e-04",
+ "loss": 0.8928,
+ "slid_loss": 0.8813,
+ "step": 1586,
+ "time": 13.43
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": "1.8776e-04",
+ "loss": 0.808,
+ "slid_loss": 0.8807,
+ "step": 1587,
+ "time": 11.51
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8773e-04",
+ "loss": 0.9355,
+ "slid_loss": 0.8808,
+ "step": 1588,
+ "time": 11.51
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8771e-04",
+ "loss": 0.9875,
+ "slid_loss": 0.8814,
+ "step": 1589,
+ "time": 13.1
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8769e-04",
+ "loss": 0.8737,
+ "slid_loss": 0.8816,
+ "step": 1590,
+ "time": 11.82
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8767e-04",
+ "loss": 0.9711,
+ "slid_loss": 0.8832,
+ "step": 1591,
+ "time": 13.29
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8765e-04",
+ "loss": 0.8412,
+ "slid_loss": 0.8833,
+ "step": 1592,
+ "time": 11.37
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8762e-04",
+ "loss": 0.8723,
+ "slid_loss": 0.8835,
+ "step": 1593,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8760e-04",
+ "loss": 0.8755,
+ "slid_loss": 0.883,
+ "step": 1594,
+ "time": 13.27
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8758e-04",
+ "loss": 0.9335,
+ "slid_loss": 0.8835,
+ "step": 1595,
+ "time": 13.18
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8756e-04",
+ "loss": 0.7671,
+ "slid_loss": 0.8818,
+ "step": 1596,
+ "time": 13.37
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": "1.8753e-04",
+ "loss": 0.8048,
+ "slid_loss": 0.8813,
+ "step": 1597,
+ "time": 13.66
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8751e-04",
+ "loss": 0.8984,
+ "slid_loss": 0.8819,
+ "step": 1598,
+ "time": 12.19
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8749e-04",
+ "loss": 0.8758,
+ "slid_loss": 0.882,
+ "step": 1599,
+ "time": 12.15
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8747e-04",
+ "loss": 0.9149,
+ "slid_loss": 0.8837,
+ "step": 1600,
+ "time": 11.71
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8745e-04",
+ "loss": 0.9414,
+ "slid_loss": 0.8842,
+ "step": 1601,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8742e-04",
+ "loss": 0.8856,
+ "slid_loss": 0.8842,
+ "step": 1602,
+ "time": 13.4
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8740e-04",
+ "loss": 1.0178,
+ "slid_loss": 0.887,
+ "step": 1603,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8738e-04",
+ "loss": 0.8888,
+ "slid_loss": 0.8875,
+ "step": 1604,
+ "time": 12.96
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8736e-04",
+ "loss": 0.9464,
+ "slid_loss": 0.888,
+ "step": 1605,
+ "time": 13.52
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8733e-04",
+ "loss": 0.8917,
+ "slid_loss": 0.8882,
+ "step": 1606,
+ "time": 11.27
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8731e-04",
+ "loss": 0.857,
+ "slid_loss": 0.8874,
+ "step": 1607,
+ "time": 12.94
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": "1.8729e-04",
+ "loss": 0.9577,
+ "slid_loss": 0.8884,
+ "step": 1608,
+ "time": 12.86
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8727e-04",
+ "loss": 0.897,
+ "slid_loss": 0.8877,
+ "step": 1609,
+ "time": 13.85
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8725e-04",
+ "loss": 0.8651,
+ "slid_loss": 0.8873,
+ "step": 1610,
+ "time": 13.66
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8722e-04",
+ "loss": 1.0093,
+ "slid_loss": 0.8888,
+ "step": 1611,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8720e-04",
+ "loss": 0.8059,
+ "slid_loss": 0.8874,
+ "step": 1612,
+ "time": 13.64
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8718e-04",
+ "loss": 0.9999,
+ "slid_loss": 0.8886,
+ "step": 1613,
+ "time": 13.33
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8716e-04",
+ "loss": 0.8195,
+ "slid_loss": 0.8888,
+ "step": 1614,
+ "time": 11.33
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8713e-04",
+ "loss": 0.9,
+ "slid_loss": 0.8873,
+ "step": 1615,
+ "time": 11.54
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8711e-04",
+ "loss": 0.7715,
+ "slid_loss": 0.8864,
+ "step": 1616,
+ "time": 14.44
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8709e-04",
+ "loss": 0.9013,
+ "slid_loss": 0.8863,
+ "step": 1617,
+ "time": 13.32
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": "1.8707e-04",
+ "loss": 0.8062,
+ "slid_loss": 0.8852,
+ "step": 1618,
+ "time": 13.25
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8704e-04",
+ "loss": 0.8497,
+ "slid_loss": 0.8844,
+ "step": 1619,
+ "time": 13.54
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8702e-04",
+ "loss": 0.7631,
+ "slid_loss": 0.8835,
+ "step": 1620,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8700e-04",
+ "loss": 0.8759,
+ "slid_loss": 0.8838,
+ "step": 1621,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8698e-04",
+ "loss": 0.9415,
+ "slid_loss": 0.8845,
+ "step": 1622,
+ "time": 11.55
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8695e-04",
+ "loss": 0.7913,
+ "slid_loss": 0.8847,
+ "step": 1623,
+ "time": 11.95
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8693e-04",
+ "loss": 0.8145,
+ "slid_loss": 0.884,
+ "step": 1624,
+ "time": 11.43
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8691e-04",
+ "loss": 0.8533,
+ "slid_loss": 0.8829,
+ "step": 1625,
+ "time": 12.98
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8689e-04",
+ "loss": 0.848,
+ "slid_loss": 0.8819,
+ "step": 1626,
+ "time": 13.32
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8686e-04",
+ "loss": 0.8651,
+ "slid_loss": 0.881,
+ "step": 1627,
+ "time": 14.15
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8684e-04",
+ "loss": 0.8467,
+ "slid_loss": 0.8813,
+ "step": 1628,
+ "time": 13.18
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": "1.8682e-04",
+ "loss": 0.8886,
+ "slid_loss": 0.882,
+ "step": 1629,
+ "time": 12.92
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8679e-04",
+ "loss": 0.8826,
+ "slid_loss": 0.8813,
+ "step": 1630,
+ "time": 13.0
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8677e-04",
+ "loss": 0.9009,
+ "slid_loss": 0.8815,
+ "step": 1631,
+ "time": 12.98
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8675e-04",
+ "loss": 0.95,
+ "slid_loss": 0.8818,
+ "step": 1632,
+ "time": 11.58
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8673e-04",
+ "loss": 0.8361,
+ "slid_loss": 0.8817,
+ "step": 1633,
+ "time": 11.95
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8670e-04",
+ "loss": 0.9011,
+ "slid_loss": 0.8808,
+ "step": 1634,
+ "time": 12.88
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8668e-04",
+ "loss": 0.9022,
+ "slid_loss": 0.8811,
+ "step": 1635,
+ "time": 13.86
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8666e-04",
+ "loss": 0.8486,
+ "slid_loss": 0.881,
+ "step": 1636,
+ "time": 12.95
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8664e-04",
+ "loss": 0.8239,
+ "slid_loss": 0.8801,
+ "step": 1637,
+ "time": 12.12
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8661e-04",
+ "loss": 0.9273,
+ "slid_loss": 0.8802,
+ "step": 1638,
+ "time": 14.12
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": "1.8659e-04",
+ "loss": 0.9531,
+ "slid_loss": 0.8816,
+ "step": 1639,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8657e-04",
+ "loss": 0.9465,
+ "slid_loss": 0.8808,
+ "step": 1640,
+ "time": 12.78
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8654e-04",
+ "loss": 0.8981,
+ "slid_loss": 0.8798,
+ "step": 1641,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8652e-04",
+ "loss": 0.9054,
+ "slid_loss": 0.8804,
+ "step": 1642,
+ "time": 14.65
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8650e-04",
+ "loss": 0.8536,
+ "slid_loss": 0.8803,
+ "step": 1643,
+ "time": 13.11
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8648e-04",
+ "loss": 0.7797,
+ "slid_loss": 0.8795,
+ "step": 1644,
+ "time": 12.19
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8645e-04",
+ "loss": 1.003,
+ "slid_loss": 0.88,
+ "step": 1645,
+ "time": 12.85
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8643e-04",
+ "loss": 0.9013,
+ "slid_loss": 0.8808,
+ "step": 1646,
+ "time": 13.75
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8641e-04",
+ "loss": 0.8613,
+ "slid_loss": 0.8807,
+ "step": 1647,
+ "time": 11.27
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8638e-04",
+ "loss": 0.8331,
+ "slid_loss": 0.8806,
+ "step": 1648,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": "1.8636e-04",
+ "loss": 0.8033,
+ "slid_loss": 0.8803,
+ "step": 1649,
+ "time": 12.91
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8634e-04",
+ "loss": 0.7966,
+ "slid_loss": 0.8794,
+ "step": 1650,
+ "time": 11.78
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8631e-04",
+ "loss": 0.7707,
+ "slid_loss": 0.8783,
+ "step": 1651,
+ "time": 12.9
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8629e-04",
+ "loss": 0.8721,
+ "slid_loss": 0.8789,
+ "step": 1652,
+ "time": 13.71
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8627e-04",
+ "loss": 0.7965,
+ "slid_loss": 0.878,
+ "step": 1653,
+ "time": 13.15
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8624e-04",
+ "loss": 0.9431,
+ "slid_loss": 0.8788,
+ "step": 1654,
+ "time": 11.51
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8622e-04",
+ "loss": 0.7466,
+ "slid_loss": 0.8775,
+ "step": 1655,
+ "time": 13.2
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8620e-04",
+ "loss": 0.9377,
+ "slid_loss": 0.8773,
+ "step": 1656,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8618e-04",
+ "loss": 0.8826,
+ "slid_loss": 0.8765,
+ "step": 1657,
+ "time": 13.63
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8615e-04",
+ "loss": 0.9438,
+ "slid_loss": 0.878,
+ "step": 1658,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8613e-04",
+ "loss": 0.8679,
+ "slid_loss": 0.8788,
+ "step": 1659,
+ "time": 13.78
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": "1.8611e-04",
+ "loss": 0.8157,
+ "slid_loss": 0.8786,
+ "step": 1660,
+ "time": 13.09
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8608e-04",
+ "loss": 0.8443,
+ "slid_loss": 0.8785,
+ "step": 1661,
+ "time": 12.92
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8606e-04",
+ "loss": 0.8592,
+ "slid_loss": 0.8783,
+ "step": 1662,
+ "time": 13.51
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8604e-04",
+ "loss": 0.8848,
+ "slid_loss": 0.8778,
+ "step": 1663,
+ "time": 13.81
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8601e-04",
+ "loss": 0.8622,
+ "slid_loss": 0.8784,
+ "step": 1664,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8599e-04",
+ "loss": 0.9053,
+ "slid_loss": 0.8785,
+ "step": 1665,
+ "time": 14.04
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8597e-04",
+ "loss": 0.7656,
+ "slid_loss": 0.8766,
+ "step": 1666,
+ "time": 11.16
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8594e-04",
+ "loss": 0.9854,
+ "slid_loss": 0.8778,
+ "step": 1667,
+ "time": 13.35
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8592e-04",
+ "loss": 0.8367,
+ "slid_loss": 0.8769,
+ "step": 1668,
+ "time": 11.99
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8590e-04",
+ "loss": 0.8438,
+ "slid_loss": 0.8761,
+ "step": 1669,
+ "time": 12.97
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": "1.8587e-04",
+ "loss": 1.0256,
+ "slid_loss": 0.8781,
+ "step": 1670,
+ "time": 12.88
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8585e-04",
+ "loss": 0.956,
+ "slid_loss": 0.8784,
+ "step": 1671,
+ "time": 12.96
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8583e-04",
+ "loss": 0.8589,
+ "slid_loss": 0.8787,
+ "step": 1672,
+ "time": 11.62
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8580e-04",
+ "loss": 0.8256,
+ "slid_loss": 0.8786,
+ "step": 1673,
+ "time": 10.74
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8578e-04",
+ "loss": 0.7996,
+ "slid_loss": 0.8783,
+ "step": 1674,
+ "time": 13.82
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8576e-04",
+ "loss": 0.9398,
+ "slid_loss": 0.8786,
+ "step": 1675,
+ "time": 13.2
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8573e-04",
+ "loss": 0.8629,
+ "slid_loss": 0.8781,
+ "step": 1676,
+ "time": 12.08
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8571e-04",
+ "loss": 0.9217,
+ "slid_loss": 0.8791,
+ "step": 1677,
+ "time": 13.48
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8569e-04",
+ "loss": 0.7856,
+ "slid_loss": 0.8779,
+ "step": 1678,
+ "time": 11.48
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8566e-04",
+ "loss": 0.8381,
+ "slid_loss": 0.8778,
+ "step": 1679,
+ "time": 12.18
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8564e-04",
+ "loss": 0.7912,
+ "slid_loss": 0.8776,
+ "step": 1680,
+ "time": 11.3
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": "1.8562e-04",
+ "loss": 0.7995,
+ "slid_loss": 0.8764,
+ "step": 1681,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8559e-04",
+ "loss": 0.8343,
+ "slid_loss": 0.8748,
+ "step": 1682,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8557e-04",
+ "loss": 0.8654,
+ "slid_loss": 0.874,
+ "step": 1683,
+ "time": 13.31
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8554e-04",
+ "loss": 0.8979,
+ "slid_loss": 0.8743,
+ "step": 1684,
+ "time": 12.49
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8552e-04",
+ "loss": 0.8969,
+ "slid_loss": 0.8752,
+ "step": 1685,
+ "time": 13.75
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8550e-04",
+ "loss": 0.8643,
+ "slid_loss": 0.8749,
+ "step": 1686,
+ "time": 13.34
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8547e-04",
+ "loss": 0.9115,
+ "slid_loss": 0.876,
+ "step": 1687,
+ "time": 13.54
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8545e-04",
+ "loss": 0.7537,
+ "slid_loss": 0.8742,
+ "step": 1688,
+ "time": 12.99
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8543e-04",
+ "loss": 0.834,
+ "slid_loss": 0.8726,
+ "step": 1689,
+ "time": 13.12
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8540e-04",
+ "loss": 0.7974,
+ "slid_loss": 0.8719,
+ "step": 1690,
+ "time": 12.74
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": "1.8538e-04",
+ "loss": 0.9127,
+ "slid_loss": 0.8713,
+ "step": 1691,
+ "time": 13.52
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8536e-04",
+ "loss": 0.8939,
+ "slid_loss": 0.8718,
+ "step": 1692,
+ "time": 11.4
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8533e-04",
+ "loss": 0.863,
+ "slid_loss": 0.8717,
+ "step": 1693,
+ "time": 13.52
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8531e-04",
+ "loss": 0.9842,
+ "slid_loss": 0.8728,
+ "step": 1694,
+ "time": 13.17
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8528e-04",
+ "loss": 0.8556,
+ "slid_loss": 0.872,
+ "step": 1695,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8526e-04",
+ "loss": 0.8397,
+ "slid_loss": 0.8727,
+ "step": 1696,
+ "time": 13.26
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8524e-04",
+ "loss": 0.9243,
+ "slid_loss": 0.8739,
+ "step": 1697,
+ "time": 13.7
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8521e-04",
+ "loss": 0.9987,
+ "slid_loss": 0.8749,
+ "step": 1698,
+ "time": 13.79
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8519e-04",
+ "loss": 0.9109,
+ "slid_loss": 0.8753,
+ "step": 1699,
+ "time": 13.71
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8517e-04",
+ "loss": 0.7638,
+ "slid_loss": 0.8738,
+ "step": 1700,
+ "time": 11.29
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8514e-04",
+ "loss": 0.9244,
+ "slid_loss": 0.8736,
+ "step": 1701,
+ "time": 13.82
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": "1.8512e-04",
+ "loss": 0.8626,
+ "slid_loss": 0.8734,
+ "step": 1702,
+ "time": 11.57
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8509e-04",
+ "loss": 0.9134,
+ "slid_loss": 0.8723,
+ "step": 1703,
+ "time": 13.7
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8507e-04",
+ "loss": 0.8617,
+ "slid_loss": 0.8721,
+ "step": 1704,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8505e-04",
+ "loss": 0.9085,
+ "slid_loss": 0.8717,
+ "step": 1705,
+ "time": 14.01
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8502e-04",
+ "loss": 0.8481,
+ "slid_loss": 0.8713,
+ "step": 1706,
+ "time": 14.04
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8500e-04",
+ "loss": 0.8989,
+ "slid_loss": 0.8717,
+ "step": 1707,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8497e-04",
+ "loss": 0.8927,
+ "slid_loss": 0.871,
+ "step": 1708,
+ "time": 14.23
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8495e-04",
+ "loss": 0.7777,
+ "slid_loss": 0.8698,
+ "step": 1709,
+ "time": 12.36
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8493e-04",
+ "loss": 0.8989,
+ "slid_loss": 0.8702,
+ "step": 1710,
+ "time": 13.14
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8490e-04",
+ "loss": 0.8343,
+ "slid_loss": 0.8684,
+ "step": 1711,
+ "time": 10.76
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": "1.8488e-04",
+ "loss": 0.8807,
+ "slid_loss": 0.8692,
+ "step": 1712,
+ "time": 14.31
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8485e-04",
+ "loss": 0.8262,
+ "slid_loss": 0.8674,
+ "step": 1713,
+ "time": 13.11
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8483e-04",
+ "loss": 0.9448,
+ "slid_loss": 0.8687,
+ "step": 1714,
+ "time": 13.5
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8481e-04",
+ "loss": 0.8627,
+ "slid_loss": 0.8683,
+ "step": 1715,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8478e-04",
+ "loss": 0.8639,
+ "slid_loss": 0.8692,
+ "step": 1716,
+ "time": 12.35
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8476e-04",
+ "loss": 0.8812,
+ "slid_loss": 0.869,
+ "step": 1717,
+ "time": 11.79
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8473e-04",
+ "loss": 0.7212,
+ "slid_loss": 0.8682,
+ "step": 1718,
+ "time": 11.13
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8471e-04",
+ "loss": 0.8801,
+ "slid_loss": 0.8685,
+ "step": 1719,
+ "time": 13.02
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8469e-04",
+ "loss": 0.8134,
+ "slid_loss": 0.869,
+ "step": 1720,
+ "time": 13.4
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8466e-04",
+ "loss": 0.8531,
+ "slid_loss": 0.8688,
+ "step": 1721,
+ "time": 14.14
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": "1.8464e-04",
+ "loss": 0.9186,
+ "slid_loss": 0.8685,
+ "step": 1722,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8461e-04",
+ "loss": 0.8145,
+ "slid_loss": 0.8688,
+ "step": 1723,
+ "time": 11.04
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8459e-04",
+ "loss": 0.8877,
+ "slid_loss": 0.8695,
+ "step": 1724,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8456e-04",
+ "loss": 0.872,
+ "slid_loss": 0.8697,
+ "step": 1725,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8454e-04",
+ "loss": 0.9254,
+ "slid_loss": 0.8705,
+ "step": 1726,
+ "time": 12.78
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8452e-04",
+ "loss": 0.8998,
+ "slid_loss": 0.8708,
+ "step": 1727,
+ "time": 12.04
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8449e-04",
+ "loss": 0.8893,
+ "slid_loss": 0.8712,
+ "step": 1728,
+ "time": 11.42
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8447e-04",
+ "loss": 0.8752,
+ "slid_loss": 0.8711,
+ "step": 1729,
+ "time": 14.03
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8444e-04",
+ "loss": 0.8574,
+ "slid_loss": 0.8708,
+ "step": 1730,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8442e-04",
+ "loss": 0.7861,
+ "slid_loss": 0.8697,
+ "step": 1731,
+ "time": 12.95
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8439e-04",
+ "loss": 0.8442,
+ "slid_loss": 0.8686,
+ "step": 1732,
+ "time": 14.15
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": "1.8437e-04",
+ "loss": 0.9104,
+ "slid_loss": 0.8694,
+ "step": 1733,
+ "time": 13.7
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8435e-04",
+ "loss": 0.8555,
+ "slid_loss": 0.8689,
+ "step": 1734,
+ "time": 13.5
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8432e-04",
+ "loss": 0.9634,
+ "slid_loss": 0.8695,
+ "step": 1735,
+ "time": 12.9
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8430e-04",
+ "loss": 0.8647,
+ "slid_loss": 0.8697,
+ "step": 1736,
+ "time": 12.25
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8427e-04",
+ "loss": 0.9067,
+ "slid_loss": 0.8705,
+ "step": 1737,
+ "time": 13.61
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8425e-04",
+ "loss": 0.9001,
+ "slid_loss": 0.8703,
+ "step": 1738,
+ "time": 13.16
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8422e-04",
+ "loss": 0.7648,
+ "slid_loss": 0.8684,
+ "step": 1739,
+ "time": 13.9
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8420e-04",
+ "loss": 0.8603,
+ "slid_loss": 0.8675,
+ "step": 1740,
+ "time": 12.73
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8417e-04",
+ "loss": 0.8236,
+ "slid_loss": 0.8668,
+ "step": 1741,
+ "time": 13.83
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8415e-04",
+ "loss": 0.8864,
+ "slid_loss": 0.8666,
+ "step": 1742,
+ "time": 13.61
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": "1.8413e-04",
+ "loss": 0.8332,
+ "slid_loss": 0.8664,
+ "step": 1743,
+ "time": 12.19
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8410e-04",
+ "loss": 0.8174,
+ "slid_loss": 0.8667,
+ "step": 1744,
+ "time": 13.5
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8408e-04",
+ "loss": 0.8142,
+ "slid_loss": 0.8649,
+ "step": 1745,
+ "time": 13.75
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8405e-04",
+ "loss": 0.9411,
+ "slid_loss": 0.8653,
+ "step": 1746,
+ "time": 13.99
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8403e-04",
+ "loss": 0.915,
+ "slid_loss": 0.8658,
+ "step": 1747,
+ "time": 12.93
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8400e-04",
+ "loss": 0.8585,
+ "slid_loss": 0.866,
+ "step": 1748,
+ "time": 13.43
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8398e-04",
+ "loss": 0.8574,
+ "slid_loss": 0.8666,
+ "step": 1749,
+ "time": 11.74
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8395e-04",
+ "loss": 0.7164,
+ "slid_loss": 0.8658,
+ "step": 1750,
+ "time": 12.9
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8393e-04",
+ "loss": 0.8656,
+ "slid_loss": 0.8667,
+ "step": 1751,
+ "time": 13.71
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8390e-04",
+ "loss": 0.8621,
+ "slid_loss": 0.8666,
+ "step": 1752,
+ "time": 13.45
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8388e-04",
+ "loss": 0.8397,
+ "slid_loss": 0.8671,
+ "step": 1753,
+ "time": 12.36
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": "1.8385e-04",
+ "loss": 0.9576,
+ "slid_loss": 0.8672,
+ "step": 1754,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8383e-04",
+ "loss": 0.8619,
+ "slid_loss": 0.8684,
+ "step": 1755,
+ "time": 13.49
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8381e-04",
+ "loss": 0.9332,
+ "slid_loss": 0.8683,
+ "step": 1756,
+ "time": 13.35
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8378e-04",
+ "loss": 0.8423,
+ "slid_loss": 0.8679,
+ "step": 1757,
+ "time": 13.26
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8376e-04",
+ "loss": 0.8339,
+ "slid_loss": 0.8668,
+ "step": 1758,
+ "time": 14.07
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8373e-04",
+ "loss": 0.9393,
+ "slid_loss": 0.8675,
+ "step": 1759,
+ "time": 13.87
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8371e-04",
+ "loss": 0.8294,
+ "slid_loss": 0.8677,
+ "step": 1760,
+ "time": 11.34
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8368e-04",
+ "loss": 0.9034,
+ "slid_loss": 0.8683,
+ "step": 1761,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8366e-04",
+ "loss": 0.7987,
+ "slid_loss": 0.8677,
+ "step": 1762,
+ "time": 13.5
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8363e-04",
+ "loss": 0.8401,
+ "slid_loss": 0.8672,
+ "step": 1763,
+ "time": 12.94
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": "1.8361e-04",
+ "loss": 0.8723,
+ "slid_loss": 0.8673,
+ "step": 1764,
+ "time": 12.75
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8358e-04",
+ "loss": 0.8458,
+ "slid_loss": 0.8667,
+ "step": 1765,
+ "time": 13.17
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8356e-04",
+ "loss": 0.8551,
+ "slid_loss": 0.8676,
+ "step": 1766,
+ "time": 13.9
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8353e-04",
+ "loss": 0.8023,
+ "slid_loss": 0.8658,
+ "step": 1767,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8351e-04",
+ "loss": 0.835,
+ "slid_loss": 0.8658,
+ "step": 1768,
+ "time": 11.88
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8348e-04",
+ "loss": 0.9774,
+ "slid_loss": 0.8671,
+ "step": 1769,
+ "time": 12.77
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8346e-04",
+ "loss": 0.8232,
+ "slid_loss": 0.8651,
+ "step": 1770,
+ "time": 13.94
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8343e-04",
+ "loss": 0.8366,
+ "slid_loss": 0.8639,
+ "step": 1771,
+ "time": 13.43
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8341e-04",
+ "loss": 0.8329,
+ "slid_loss": 0.8636,
+ "step": 1772,
+ "time": 10.84
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8338e-04",
+ "loss": 0.8345,
+ "slid_loss": 0.8637,
+ "step": 1773,
+ "time": 13.95
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": "1.8336e-04",
+ "loss": 0.8691,
+ "slid_loss": 0.8644,
+ "step": 1774,
+ "time": 12.97
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8333e-04",
+ "loss": 0.8222,
+ "slid_loss": 0.8632,
+ "step": 1775,
+ "time": 13.77
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8331e-04",
+ "loss": 0.8095,
+ "slid_loss": 0.8627,
+ "step": 1776,
+ "time": 14.3
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8328e-04",
+ "loss": 0.8935,
+ "slid_loss": 0.8624,
+ "step": 1777,
+ "time": 13.75
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8326e-04",
+ "loss": 0.902,
+ "slid_loss": 0.8636,
+ "step": 1778,
+ "time": 13.11
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8323e-04",
+ "loss": 0.8801,
+ "slid_loss": 0.864,
+ "step": 1779,
+ "time": 11.65
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8321e-04",
+ "loss": 0.9188,
+ "slid_loss": 0.8653,
+ "step": 1780,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8318e-04",
+ "loss": 0.8929,
+ "slid_loss": 0.8662,
+ "step": 1781,
+ "time": 13.97
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8316e-04",
+ "loss": 0.837,
+ "slid_loss": 0.8662,
+ "step": 1782,
+ "time": 12.83
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8313e-04",
+ "loss": 0.7895,
+ "slid_loss": 0.8655,
+ "step": 1783,
+ "time": 13.1
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8311e-04",
+ "loss": 0.8354,
+ "slid_loss": 0.8648,
+ "step": 1784,
+ "time": 13.27
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": "1.8308e-04",
+ "loss": 0.8624,
+ "slid_loss": 0.8645,
+ "step": 1785,
+ "time": 12.81
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8306e-04",
+ "loss": 0.7806,
+ "slid_loss": 0.8637,
+ "step": 1786,
+ "time": 12.92
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8303e-04",
+ "loss": 0.8302,
+ "slid_loss": 0.8629,
+ "step": 1787,
+ "time": 13.83
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8301e-04",
+ "loss": 0.8791,
+ "slid_loss": 0.8641,
+ "step": 1788,
+ "time": 11.45
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8298e-04",
+ "loss": 0.8754,
+ "slid_loss": 0.8645,
+ "step": 1789,
+ "time": 14.2
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8296e-04",
+ "loss": 0.8404,
+ "slid_loss": 0.865,
+ "step": 1790,
+ "time": 11.77
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8293e-04",
+ "loss": 0.7928,
+ "slid_loss": 0.8638,
+ "step": 1791,
+ "time": 13.19
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8291e-04",
+ "loss": 0.8119,
+ "slid_loss": 0.8629,
+ "step": 1792,
+ "time": 13.43
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8288e-04",
+ "loss": 0.8285,
+ "slid_loss": 0.8626,
+ "step": 1793,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8286e-04",
+ "loss": 0.8777,
+ "slid_loss": 0.8615,
+ "step": 1794,
+ "time": 14.13
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": "1.8283e-04",
+ "loss": 0.8875,
+ "slid_loss": 0.8618,
+ "step": 1795,
+ "time": 12.82
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8281e-04",
+ "loss": 0.8827,
+ "slid_loss": 0.8623,
+ "step": 1796,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8278e-04",
+ "loss": 0.9082,
+ "slid_loss": 0.8621,
+ "step": 1797,
+ "time": 13.75
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8275e-04",
+ "loss": 0.8735,
+ "slid_loss": 0.8609,
+ "step": 1798,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8273e-04",
+ "loss": 0.9085,
+ "slid_loss": 0.8608,
+ "step": 1799,
+ "time": 13.59
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8270e-04",
+ "loss": 0.8658,
+ "slid_loss": 0.8619,
+ "step": 1800,
+ "time": 13.54
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8268e-04",
+ "loss": 0.8491,
+ "slid_loss": 0.8611,
+ "step": 1801,
+ "time": 13.01
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8265e-04",
+ "loss": 0.7872,
+ "slid_loss": 0.8604,
+ "step": 1802,
+ "time": 13.43
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8263e-04",
+ "loss": 0.7199,
+ "slid_loss": 0.8584,
+ "step": 1803,
+ "time": 13.35
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8260e-04",
+ "loss": 0.9142,
+ "slid_loss": 0.8589,
+ "step": 1804,
+ "time": 13.22
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8258e-04",
+ "loss": 0.9352,
+ "slid_loss": 0.8592,
+ "step": 1805,
+ "time": 11.36
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": "1.8255e-04",
+ "loss": 0.8022,
+ "slid_loss": 0.8587,
+ "step": 1806,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8253e-04",
+ "loss": 0.8291,
+ "slid_loss": 0.8581,
+ "step": 1807,
+ "time": 15.22
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8250e-04",
+ "loss": 0.9208,
+ "slid_loss": 0.8583,
+ "step": 1808,
+ "time": 13.86
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8247e-04",
+ "loss": 0.8905,
+ "slid_loss": 0.8595,
+ "step": 1809,
+ "time": 12.11
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8245e-04",
+ "loss": 0.7648,
+ "slid_loss": 0.8581,
+ "step": 1810,
+ "time": 13.09
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8242e-04",
+ "loss": 0.8795,
+ "slid_loss": 0.8586,
+ "step": 1811,
+ "time": 13.86
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8240e-04",
+ "loss": 0.869,
+ "slid_loss": 0.8585,
+ "step": 1812,
+ "time": 13.88
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8237e-04",
+ "loss": 0.8434,
+ "slid_loss": 0.8586,
+ "step": 1813,
+ "time": 12.78
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8235e-04",
+ "loss": 0.8733,
+ "slid_loss": 0.8579,
+ "step": 1814,
+ "time": 13.93
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8232e-04",
+ "loss": 0.8318,
+ "slid_loss": 0.8576,
+ "step": 1815,
+ "time": 13.92
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": "1.8230e-04",
+ "loss": 0.8529,
+ "slid_loss": 0.8575,
+ "step": 1816,
+ "time": 13.88
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8227e-04",
+ "loss": 0.8967,
+ "slid_loss": 0.8576,
+ "step": 1817,
+ "time": 12.02
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8224e-04",
+ "loss": 0.8689,
+ "slid_loss": 0.8591,
+ "step": 1818,
+ "time": 12.4
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8222e-04",
+ "loss": 0.8187,
+ "slid_loss": 0.8585,
+ "step": 1819,
+ "time": 12.1
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8219e-04",
+ "loss": 0.9251,
+ "slid_loss": 0.8596,
+ "step": 1820,
+ "time": 13.55
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8217e-04",
+ "loss": 0.7818,
+ "slid_loss": 0.8589,
+ "step": 1821,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8214e-04",
+ "loss": 0.8169,
+ "slid_loss": 0.8579,
+ "step": 1822,
+ "time": 13.59
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8212e-04",
+ "loss": 0.8767,
+ "slid_loss": 0.8585,
+ "step": 1823,
+ "time": 12.17
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8209e-04",
+ "loss": 0.7161,
+ "slid_loss": 0.8568,
+ "step": 1824,
+ "time": 11.85
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8207e-04",
+ "loss": 0.8245,
+ "slid_loss": 0.8563,
+ "step": 1825,
+ "time": 12.41
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": "1.8204e-04",
+ "loss": 0.8785,
+ "slid_loss": 0.8559,
+ "step": 1826,
+ "time": 14.05
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8201e-04",
+ "loss": 0.7711,
+ "slid_loss": 0.8546,
+ "step": 1827,
+ "time": 14.38
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8199e-04",
+ "loss": 0.8264,
+ "slid_loss": 0.8539,
+ "step": 1828,
+ "time": 13.79
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8196e-04",
+ "loss": 0.8566,
+ "slid_loss": 0.8538,
+ "step": 1829,
+ "time": 13.01
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8194e-04",
+ "loss": 0.8851,
+ "slid_loss": 0.854,
+ "step": 1830,
+ "time": 11.95
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8191e-04",
+ "loss": 0.8069,
+ "slid_loss": 0.8542,
+ "step": 1831,
+ "time": 11.2
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8188e-04",
+ "loss": 0.8387,
+ "slid_loss": 0.8542,
+ "step": 1832,
+ "time": 13.93
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8186e-04",
+ "loss": 0.7812,
+ "slid_loss": 0.8529,
+ "step": 1833,
+ "time": 11.98
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8183e-04",
+ "loss": 0.8376,
+ "slid_loss": 0.8527,
+ "step": 1834,
+ "time": 13.66
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8181e-04",
+ "loss": 0.9116,
+ "slid_loss": 0.8522,
+ "step": 1835,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8178e-04",
+ "loss": 0.9657,
+ "slid_loss": 0.8532,
+ "step": 1836,
+ "time": 12.8
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": "1.8176e-04",
+ "loss": 0.9019,
+ "slid_loss": 0.8532,
+ "step": 1837,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8173e-04",
+ "loss": 1.1192,
+ "slid_loss": 0.8554,
+ "step": 1838,
+ "time": 12.55
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8170e-04",
+ "loss": 0.8569,
+ "slid_loss": 0.8563,
+ "step": 1839,
+ "time": 13.83
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8168e-04",
+ "loss": 0.8324,
+ "slid_loss": 0.856,
+ "step": 1840,
+ "time": 12.38
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8165e-04",
+ "loss": 0.832,
+ "slid_loss": 0.8561,
+ "step": 1841,
+ "time": 12.03
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8163e-04",
+ "loss": 0.9014,
+ "slid_loss": 0.8562,
+ "step": 1842,
+ "time": 11.96
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8160e-04",
+ "loss": 0.7229,
+ "slid_loss": 0.8551,
+ "step": 1843,
+ "time": 13.64
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8157e-04",
+ "loss": 0.8013,
+ "slid_loss": 0.855,
+ "step": 1844,
+ "time": 13.78
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8155e-04",
+ "loss": 0.8193,
+ "slid_loss": 0.855,
+ "step": 1845,
+ "time": 12.07
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8152e-04",
+ "loss": 0.8283,
+ "slid_loss": 0.8539,
+ "step": 1846,
+ "time": 12.31
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": "1.8150e-04",
+ "loss": 0.8036,
+ "slid_loss": 0.8528,
+ "step": 1847,
+ "time": 14.03
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8147e-04",
+ "loss": 0.7926,
+ "slid_loss": 0.8521,
+ "step": 1848,
+ "time": 13.89
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8144e-04",
+ "loss": 0.8552,
+ "slid_loss": 0.8521,
+ "step": 1849,
+ "time": 13.71
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8142e-04",
+ "loss": 0.8418,
+ "slid_loss": 0.8533,
+ "step": 1850,
+ "time": 13.9
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8139e-04",
+ "loss": 0.8412,
+ "slid_loss": 0.8531,
+ "step": 1851,
+ "time": 13.14
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8137e-04",
+ "loss": 0.8781,
+ "slid_loss": 0.8533,
+ "step": 1852,
+ "time": 13.61
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8134e-04",
+ "loss": 0.9458,
+ "slid_loss": 0.8543,
+ "step": 1853,
+ "time": 14.18
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8131e-04",
+ "loss": 0.7624,
+ "slid_loss": 0.8524,
+ "step": 1854,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8129e-04",
+ "loss": 0.8608,
+ "slid_loss": 0.8524,
+ "step": 1855,
+ "time": 12.88
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8126e-04",
+ "loss": 0.8348,
+ "slid_loss": 0.8514,
+ "step": 1856,
+ "time": 13.09
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8123e-04",
+ "loss": 0.7263,
+ "slid_loss": 0.8502,
+ "step": 1857,
+ "time": 13.75
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": "1.8121e-04",
+ "loss": 0.8047,
+ "slid_loss": 0.8499,
+ "step": 1858,
+ "time": 13.28
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8118e-04",
+ "loss": 0.8424,
+ "slid_loss": 0.849,
+ "step": 1859,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8116e-04",
+ "loss": 0.7473,
+ "slid_loss": 0.8481,
+ "step": 1860,
+ "time": 11.47
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8113e-04",
+ "loss": 0.8913,
+ "slid_loss": 0.848,
+ "step": 1861,
+ "time": 13.85
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8110e-04",
+ "loss": 0.8397,
+ "slid_loss": 0.8484,
+ "step": 1862,
+ "time": 13.45
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8108e-04",
+ "loss": 0.7902,
+ "slid_loss": 0.8479,
+ "step": 1863,
+ "time": 12.14
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8105e-04",
+ "loss": 0.8632,
+ "slid_loss": 0.8478,
+ "step": 1864,
+ "time": 13.27
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8102e-04",
+ "loss": 0.885,
+ "slid_loss": 0.8482,
+ "step": 1865,
+ "time": 13.35
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8100e-04",
+ "loss": 0.8427,
+ "slid_loss": 0.8481,
+ "step": 1866,
+ "time": 13.45
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8097e-04",
+ "loss": 0.8384,
+ "slid_loss": 0.8485,
+ "step": 1867,
+ "time": 14.14
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": "1.8095e-04",
+ "loss": 0.8286,
+ "slid_loss": 0.8484,
+ "step": 1868,
+ "time": 13.5
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8092e-04",
+ "loss": 0.818,
+ "slid_loss": 0.8468,
+ "step": 1869,
+ "time": 14.14
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8089e-04",
+ "loss": 0.8532,
+ "slid_loss": 0.8471,
+ "step": 1870,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8087e-04",
+ "loss": 0.884,
+ "slid_loss": 0.8476,
+ "step": 1871,
+ "time": 12.87
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8084e-04",
+ "loss": 0.948,
+ "slid_loss": 0.8487,
+ "step": 1872,
+ "time": 13.13
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8081e-04",
+ "loss": 0.8873,
+ "slid_loss": 0.8493,
+ "step": 1873,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8079e-04",
+ "loss": 0.7586,
+ "slid_loss": 0.8482,
+ "step": 1874,
+ "time": 14.2
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8076e-04",
+ "loss": 0.8074,
+ "slid_loss": 0.848,
+ "step": 1875,
+ "time": 12.28
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8073e-04",
+ "loss": 0.8166,
+ "slid_loss": 0.8481,
+ "step": 1876,
+ "time": 12.66
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8071e-04",
+ "loss": 0.859,
+ "slid_loss": 0.8477,
+ "step": 1877,
+ "time": 13.13
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8068e-04",
+ "loss": 0.8403,
+ "slid_loss": 0.8471,
+ "step": 1878,
+ "time": 13.42
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": "1.8065e-04",
+ "loss": 0.8395,
+ "slid_loss": 0.8467,
+ "step": 1879,
+ "time": 13.2
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8063e-04",
+ "loss": 0.8476,
+ "slid_loss": 0.846,
+ "step": 1880,
+ "time": 13.59
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8060e-04",
+ "loss": 0.9457,
+ "slid_loss": 0.8465,
+ "step": 1881,
+ "time": 13.65
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8058e-04",
+ "loss": 0.8541,
+ "slid_loss": 0.8467,
+ "step": 1882,
+ "time": 13.57
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8055e-04",
+ "loss": 0.9112,
+ "slid_loss": 0.8479,
+ "step": 1883,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8052e-04",
+ "loss": 0.9136,
+ "slid_loss": 0.8487,
+ "step": 1884,
+ "time": 13.79
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8050e-04",
+ "loss": 0.8605,
+ "slid_loss": 0.8487,
+ "step": 1885,
+ "time": 13.37
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8047e-04",
+ "loss": 0.8005,
+ "slid_loss": 0.8489,
+ "step": 1886,
+ "time": 14.02
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8044e-04",
+ "loss": 0.8628,
+ "slid_loss": 0.8492,
+ "step": 1887,
+ "time": 13.93
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8042e-04",
+ "loss": 0.8337,
+ "slid_loss": 0.8487,
+ "step": 1888,
+ "time": 14.1
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": "1.8039e-04",
+ "loss": 0.7802,
+ "slid_loss": 0.8478,
+ "step": 1889,
+ "time": 13.56
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8036e-04",
+ "loss": 0.8708,
+ "slid_loss": 0.8481,
+ "step": 1890,
+ "time": 12.79
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8034e-04",
+ "loss": 0.8029,
+ "slid_loss": 0.8482,
+ "step": 1891,
+ "time": 11.35
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8031e-04",
+ "loss": 0.8688,
+ "slid_loss": 0.8488,
+ "step": 1892,
+ "time": 13.55
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8028e-04",
+ "loss": 0.8033,
+ "slid_loss": 0.8485,
+ "step": 1893,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8026e-04",
+ "loss": 0.7882,
+ "slid_loss": 0.8476,
+ "step": 1894,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8023e-04",
+ "loss": 0.8322,
+ "slid_loss": 0.8471,
+ "step": 1895,
+ "time": 13.97
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8020e-04",
+ "loss": 0.7679,
+ "slid_loss": 0.8459,
+ "step": 1896,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8018e-04",
+ "loss": 0.7792,
+ "slid_loss": 0.8446,
+ "step": 1897,
+ "time": 13.79
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8015e-04",
+ "loss": 0.7793,
+ "slid_loss": 0.8437,
+ "step": 1898,
+ "time": 13.45
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": "1.8012e-04",
+ "loss": 0.8488,
+ "slid_loss": 0.8431,
+ "step": 1899,
+ "time": 13.25
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.8010e-04",
+ "loss": 0.9309,
+ "slid_loss": 0.8437,
+ "step": 1900,
+ "time": 12.85
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.8007e-04",
+ "loss": 0.8012,
+ "slid_loss": 0.8433,
+ "step": 1901,
+ "time": 13.97
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.8004e-04",
+ "loss": 0.8725,
+ "slid_loss": 0.8441,
+ "step": 1902,
+ "time": 14.26
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.8002e-04",
+ "loss": 0.8714,
+ "slid_loss": 0.8456,
+ "step": 1903,
+ "time": 13.31
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.7999e-04",
+ "loss": 0.8344,
+ "slid_loss": 0.8448,
+ "step": 1904,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.7996e-04",
+ "loss": 0.7972,
+ "slid_loss": 0.8434,
+ "step": 1905,
+ "time": 13.79
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.7993e-04",
+ "loss": 0.8698,
+ "slid_loss": 0.8441,
+ "step": 1906,
+ "time": 13.69
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.7991e-04",
+ "loss": 0.8639,
+ "slid_loss": 0.8445,
+ "step": 1907,
+ "time": 13.37
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.7988e-04",
+ "loss": 0.8293,
+ "slid_loss": 0.8436,
+ "step": 1908,
+ "time": 10.93
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.7985e-04",
+ "loss": 0.8096,
+ "slid_loss": 0.8427,
+ "step": 1909,
+ "time": 14.03
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": "1.7983e-04",
+ "loss": 0.798,
+ "slid_loss": 0.8431,
+ "step": 1910,
+ "time": 12.91
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7980e-04",
+ "loss": 0.8463,
+ "slid_loss": 0.8427,
+ "step": 1911,
+ "time": 12.9
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7977e-04",
+ "loss": 0.7989,
+ "slid_loss": 0.842,
+ "step": 1912,
+ "time": 13.76
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7975e-04",
+ "loss": 0.8295,
+ "slid_loss": 0.8419,
+ "step": 1913,
+ "time": 14.01
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7972e-04",
+ "loss": 0.7834,
+ "slid_loss": 0.841,
+ "step": 1914,
+ "time": 13.66
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7969e-04",
+ "loss": 0.7744,
+ "slid_loss": 0.8404,
+ "step": 1915,
+ "time": 12.48
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7967e-04",
+ "loss": 0.9124,
+ "slid_loss": 0.841,
+ "step": 1916,
+ "time": 12.98
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7964e-04",
+ "loss": 0.8509,
+ "slid_loss": 0.8406,
+ "step": 1917,
+ "time": 12.44
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7961e-04",
+ "loss": 0.8908,
+ "slid_loss": 0.8408,
+ "step": 1918,
+ "time": 13.23
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7958e-04",
+ "loss": 0.8188,
+ "slid_loss": 0.8408,
+ "step": 1919,
+ "time": 13.56
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": "1.7956e-04",
+ "loss": 0.8608,
+ "slid_loss": 0.8401,
+ "step": 1920,
+ "time": 11.38
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7953e-04",
+ "loss": 0.8698,
+ "slid_loss": 0.841,
+ "step": 1921,
+ "time": 11.74
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7950e-04",
+ "loss": 0.8261,
+ "slid_loss": 0.8411,
+ "step": 1922,
+ "time": 13.43
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7948e-04",
+ "loss": 0.9124,
+ "slid_loss": 0.8415,
+ "step": 1923,
+ "time": 13.34
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7945e-04",
+ "loss": 0.7571,
+ "slid_loss": 0.8419,
+ "step": 1924,
+ "time": 13.98
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7942e-04",
+ "loss": 0.7411,
+ "slid_loss": 0.8411,
+ "step": 1925,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7939e-04",
+ "loss": 0.8096,
+ "slid_loss": 0.8404,
+ "step": 1926,
+ "time": 11.23
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7937e-04",
+ "loss": 0.9292,
+ "slid_loss": 0.8419,
+ "step": 1927,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7934e-04",
+ "loss": 0.86,
+ "slid_loss": 0.8423,
+ "step": 1928,
+ "time": 13.55
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7931e-04",
+ "loss": 0.8202,
+ "slid_loss": 0.8419,
+ "step": 1929,
+ "time": 11.31
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7929e-04",
+ "loss": 0.8622,
+ "slid_loss": 0.8417,
+ "step": 1930,
+ "time": 13.39
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": "1.7926e-04",
+ "loss": 0.888,
+ "slid_loss": 0.8425,
+ "step": 1931,
+ "time": 12.26
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7923e-04",
+ "loss": 0.8174,
+ "slid_loss": 0.8423,
+ "step": 1932,
+ "time": 11.9
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7920e-04",
+ "loss": 0.8287,
+ "slid_loss": 0.8428,
+ "step": 1933,
+ "time": 13.22
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7918e-04",
+ "loss": 0.8023,
+ "slid_loss": 0.8424,
+ "step": 1934,
+ "time": 11.61
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7915e-04",
+ "loss": 0.8942,
+ "slid_loss": 0.8422,
+ "step": 1935,
+ "time": 12.77
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7912e-04",
+ "loss": 0.8124,
+ "slid_loss": 0.8407,
+ "step": 1936,
+ "time": 13.92
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7910e-04",
+ "loss": 0.9002,
+ "slid_loss": 0.8407,
+ "step": 1937,
+ "time": 13.84
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7907e-04",
+ "loss": 0.8668,
+ "slid_loss": 0.8382,
+ "step": 1938,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7904e-04",
+ "loss": 0.8447,
+ "slid_loss": 0.838,
+ "step": 1939,
+ "time": 13.02
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7901e-04",
+ "loss": 0.7282,
+ "slid_loss": 0.837,
+ "step": 1940,
+ "time": 12.18
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": "1.7899e-04",
+ "loss": 0.852,
+ "slid_loss": 0.8372,
+ "step": 1941,
+ "time": 14.17
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7896e-04",
+ "loss": 0.8132,
+ "slid_loss": 0.8363,
+ "step": 1942,
+ "time": 13.73
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7893e-04",
+ "loss": 0.775,
+ "slid_loss": 0.8368,
+ "step": 1943,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7890e-04",
+ "loss": 0.8363,
+ "slid_loss": 0.8372,
+ "step": 1944,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7888e-04",
+ "loss": 0.8023,
+ "slid_loss": 0.837,
+ "step": 1945,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7885e-04",
+ "loss": 0.8265,
+ "slid_loss": 0.837,
+ "step": 1946,
+ "time": 13.34
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7882e-04",
+ "loss": 0.8685,
+ "slid_loss": 0.8376,
+ "step": 1947,
+ "time": 13.74
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7879e-04",
+ "loss": 0.8309,
+ "slid_loss": 0.838,
+ "step": 1948,
+ "time": 13.12
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7877e-04",
+ "loss": 0.8025,
+ "slid_loss": 0.8375,
+ "step": 1949,
+ "time": 13.72
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7874e-04",
+ "loss": 0.8972,
+ "slid_loss": 0.8381,
+ "step": 1950,
+ "time": 13.99
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": "1.7871e-04",
+ "loss": 0.8778,
+ "slid_loss": 0.8384,
+ "step": 1951,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7868e-04",
+ "loss": 0.7979,
+ "slid_loss": 0.8376,
+ "step": 1952,
+ "time": 13.34
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7866e-04",
+ "loss": 0.8233,
+ "slid_loss": 0.8364,
+ "step": 1953,
+ "time": 14.73
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7863e-04",
+ "loss": 0.83,
+ "slid_loss": 0.8371,
+ "step": 1954,
+ "time": 13.48
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7860e-04",
+ "loss": 0.7811,
+ "slid_loss": 0.8363,
+ "step": 1955,
+ "time": 14.49
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7857e-04",
+ "loss": 0.7307,
+ "slid_loss": 0.8352,
+ "step": 1956,
+ "time": 13.16
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7855e-04",
+ "loss": 0.7403,
+ "slid_loss": 0.8354,
+ "step": 1957,
+ "time": 14.08
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7852e-04",
+ "loss": 0.7493,
+ "slid_loss": 0.8348,
+ "step": 1958,
+ "time": 12.25
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7849e-04",
+ "loss": 0.7569,
+ "slid_loss": 0.834,
+ "step": 1959,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7846e-04",
+ "loss": 0.867,
+ "slid_loss": 0.8352,
+ "step": 1960,
+ "time": 13.45
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7844e-04",
+ "loss": 0.7446,
+ "slid_loss": 0.8337,
+ "step": 1961,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": "1.7841e-04",
+ "loss": 0.911,
+ "slid_loss": 0.8344,
+ "step": 1962,
+ "time": 13.4
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7838e-04",
+ "loss": 0.8159,
+ "slid_loss": 0.8347,
+ "step": 1963,
+ "time": 12.22
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7835e-04",
+ "loss": 0.7831,
+ "slid_loss": 0.8339,
+ "step": 1964,
+ "time": 13.83
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7833e-04",
+ "loss": 0.7058,
+ "slid_loss": 0.8321,
+ "step": 1965,
+ "time": 13.42
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7830e-04",
+ "loss": 0.8786,
+ "slid_loss": 0.8324,
+ "step": 1966,
+ "time": 13.61
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7827e-04",
+ "loss": 0.8752,
+ "slid_loss": 0.8328,
+ "step": 1967,
+ "time": 13.48
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7824e-04",
+ "loss": 0.7464,
+ "slid_loss": 0.832,
+ "step": 1968,
+ "time": 13.89
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7822e-04",
+ "loss": 0.8721,
+ "slid_loss": 0.8325,
+ "step": 1969,
+ "time": 10.96
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7819e-04",
+ "loss": 0.853,
+ "slid_loss": 0.8325,
+ "step": 1970,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7816e-04",
+ "loss": 0.8372,
+ "slid_loss": 0.832,
+ "step": 1971,
+ "time": 12.96
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": "1.7813e-04",
+ "loss": 0.7642,
+ "slid_loss": 0.8302,
+ "step": 1972,
+ "time": 12.83
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7811e-04",
+ "loss": 0.8733,
+ "slid_loss": 0.8301,
+ "step": 1973,
+ "time": 11.5
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7808e-04",
+ "loss": 0.8365,
+ "slid_loss": 0.8308,
+ "step": 1974,
+ "time": 12.67
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7805e-04",
+ "loss": 0.7706,
+ "slid_loss": 0.8305,
+ "step": 1975,
+ "time": 12.78
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7802e-04",
+ "loss": 0.7279,
+ "slid_loss": 0.8296,
+ "step": 1976,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7799e-04",
+ "loss": 0.8961,
+ "slid_loss": 0.83,
+ "step": 1977,
+ "time": 13.23
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7797e-04",
+ "loss": 0.7487,
+ "slid_loss": 0.829,
+ "step": 1978,
+ "time": 13.23
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7794e-04",
+ "loss": 0.7473,
+ "slid_loss": 0.8281,
+ "step": 1979,
+ "time": 11.32
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7791e-04",
+ "loss": 0.7728,
+ "slid_loss": 0.8274,
+ "step": 1980,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7788e-04",
+ "loss": 0.869,
+ "slid_loss": 0.8266,
+ "step": 1981,
+ "time": 13.02
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7786e-04",
+ "loss": 0.8564,
+ "slid_loss": 0.8266,
+ "step": 1982,
+ "time": 10.66
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": "1.7783e-04",
+ "loss": 0.8421,
+ "slid_loss": 0.8259,
+ "step": 1983,
+ "time": 11.07
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7780e-04",
+ "loss": 0.837,
+ "slid_loss": 0.8252,
+ "step": 1984,
+ "time": 12.22
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7777e-04",
+ "loss": 0.8567,
+ "slid_loss": 0.8251,
+ "step": 1985,
+ "time": 12.95
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7774e-04",
+ "loss": 0.9824,
+ "slid_loss": 0.827,
+ "step": 1986,
+ "time": 11.88
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7772e-04",
+ "loss": 0.7986,
+ "slid_loss": 0.8263,
+ "step": 1987,
+ "time": 13.42
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7769e-04",
+ "loss": 0.8039,
+ "slid_loss": 0.826,
+ "step": 1988,
+ "time": 13.03
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7766e-04",
+ "loss": 0.7561,
+ "slid_loss": 0.8258,
+ "step": 1989,
+ "time": 13.86
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7763e-04",
+ "loss": 0.8473,
+ "slid_loss": 0.8255,
+ "step": 1990,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7760e-04",
+ "loss": 0.8765,
+ "slid_loss": 0.8263,
+ "step": 1991,
+ "time": 12.99
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7758e-04",
+ "loss": 0.8406,
+ "slid_loss": 0.826,
+ "step": 1992,
+ "time": 13.49
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": "1.7755e-04",
+ "loss": 0.7393,
+ "slid_loss": 0.8254,
+ "step": 1993,
+ "time": 11.29
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7752e-04",
+ "loss": 0.8802,
+ "slid_loss": 0.8263,
+ "step": 1994,
+ "time": 12.11
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7749e-04",
+ "loss": 0.7405,
+ "slid_loss": 0.8254,
+ "step": 1995,
+ "time": 13.03
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7746e-04",
+ "loss": 0.8566,
+ "slid_loss": 0.8262,
+ "step": 1996,
+ "time": 12.99
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7744e-04",
+ "loss": 0.7462,
+ "slid_loss": 0.8259,
+ "step": 1997,
+ "time": 13.98
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7741e-04",
+ "loss": 0.9288,
+ "slid_loss": 0.8274,
+ "step": 1998,
+ "time": 13.66
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7738e-04",
+ "loss": 0.8569,
+ "slid_loss": 0.8275,
+ "step": 1999,
+ "time": 12.8
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7735e-04",
+ "loss": 0.8382,
+ "slid_loss": 0.8266,
+ "step": 2000,
+ "time": 14.08
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7732e-04",
+ "loss": 0.7676,
+ "slid_loss": 0.8262,
+ "step": 2001,
+ "time": 13.24
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7730e-04",
+ "loss": 0.7996,
+ "slid_loss": 0.8255,
+ "step": 2002,
+ "time": 13.6
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": "1.7727e-04",
+ "loss": 0.8044,
+ "slid_loss": 0.8248,
+ "step": 2003,
+ "time": 13.64
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7724e-04",
+ "loss": 0.7631,
+ "slid_loss": 0.8241,
+ "step": 2004,
+ "time": 13.66
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7721e-04",
+ "loss": 0.7932,
+ "slid_loss": 0.8241,
+ "step": 2005,
+ "time": 14.31
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7718e-04",
+ "loss": 0.711,
+ "slid_loss": 0.8225,
+ "step": 2006,
+ "time": 13.36
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7716e-04",
+ "loss": 0.8624,
+ "slid_loss": 0.8225,
+ "step": 2007,
+ "time": 12.94
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7713e-04",
+ "loss": 0.8848,
+ "slid_loss": 0.823,
+ "step": 2008,
+ "time": 11.86
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7710e-04",
+ "loss": 0.8698,
+ "slid_loss": 0.8236,
+ "step": 2009,
+ "time": 14.06
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7707e-04",
+ "loss": 0.8482,
+ "slid_loss": 0.8241,
+ "step": 2010,
+ "time": 11.88
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7704e-04",
+ "loss": 0.903,
+ "slid_loss": 0.8247,
+ "step": 2011,
+ "time": 14.61
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7701e-04",
+ "loss": 0.7087,
+ "slid_loss": 0.8238,
+ "step": 2012,
+ "time": 13.84
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7699e-04",
+ "loss": 0.8457,
+ "slid_loss": 0.824,
+ "step": 2013,
+ "time": 13.25
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": "1.7696e-04",
+ "loss": 0.8809,
+ "slid_loss": 0.8249,
+ "step": 2014,
+ "time": 12.86
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7693e-04",
+ "loss": 0.8237,
+ "slid_loss": 0.8254,
+ "step": 2015,
+ "time": 11.07
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7690e-04",
+ "loss": 0.7807,
+ "slid_loss": 0.8241,
+ "step": 2016,
+ "time": 11.92
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7687e-04",
+ "loss": 0.7545,
+ "slid_loss": 0.8231,
+ "step": 2017,
+ "time": 13.62
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7684e-04",
+ "loss": 0.8768,
+ "slid_loss": 0.823,
+ "step": 2018,
+ "time": 14.39
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7682e-04",
+ "loss": 0.7515,
+ "slid_loss": 0.8223,
+ "step": 2019,
+ "time": 13.4
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7679e-04",
+ "loss": 0.7827,
+ "slid_loss": 0.8216,
+ "step": 2020,
+ "time": 13.46
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7676e-04",
+ "loss": 0.7893,
+ "slid_loss": 0.8207,
+ "step": 2021,
+ "time": 13.35
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7673e-04",
+ "loss": 0.8345,
+ "slid_loss": 0.8208,
+ "step": 2022,
+ "time": 11.4
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7670e-04",
+ "loss": 0.7537,
+ "slid_loss": 0.8192,
+ "step": 2023,
+ "time": 11.66
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": "1.7667e-04",
+ "loss": 0.8879,
+ "slid_loss": 0.8206,
+ "step": 2024,
+ "time": 13.75
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7665e-04",
+ "loss": 0.7623,
+ "slid_loss": 0.8208,
+ "step": 2025,
+ "time": 13.84
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7662e-04",
+ "loss": 0.8423,
+ "slid_loss": 0.8211,
+ "step": 2026,
+ "time": 12.14
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7659e-04",
+ "loss": 0.789,
+ "slid_loss": 0.8197,
+ "step": 2027,
+ "time": 13.55
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7656e-04",
+ "loss": 0.7936,
+ "slid_loss": 0.819,
+ "step": 2028,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7653e-04",
+ "loss": 0.8515,
+ "slid_loss": 0.8193,
+ "step": 2029,
+ "time": 11.79
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7650e-04",
+ "loss": 0.7697,
+ "slid_loss": 0.8184,
+ "step": 2030,
+ "time": 13.22
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7648e-04",
+ "loss": 0.8146,
+ "slid_loss": 0.8177,
+ "step": 2031,
+ "time": 13.21
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7645e-04",
+ "loss": 0.839,
+ "slid_loss": 0.8179,
+ "step": 2032,
+ "time": 11.87
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7642e-04",
+ "loss": 0.8288,
+ "slid_loss": 0.8179,
+ "step": 2033,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7639e-04",
+ "loss": 0.7764,
+ "slid_loss": 0.8176,
+ "step": 2034,
+ "time": 13.38
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": "1.7636e-04",
+ "loss": 0.7965,
+ "slid_loss": 0.8167,
+ "step": 2035,
+ "time": 11.92
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7633e-04",
+ "loss": 0.8415,
+ "slid_loss": 0.817,
+ "step": 2036,
+ "time": 12.21
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7630e-04",
+ "loss": 0.8479,
+ "slid_loss": 0.8164,
+ "step": 2037,
+ "time": 12.92
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7628e-04",
+ "loss": 0.9035,
+ "slid_loss": 0.8168,
+ "step": 2038,
+ "time": 10.7
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7625e-04",
+ "loss": 0.8547,
+ "slid_loss": 0.8169,
+ "step": 2039,
+ "time": 13.67
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7622e-04",
+ "loss": 0.8842,
+ "slid_loss": 0.8185,
+ "step": 2040,
+ "time": 13.0
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7619e-04",
+ "loss": 0.7438,
+ "slid_loss": 0.8174,
+ "step": 2041,
+ "time": 13.47
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7616e-04",
+ "loss": 0.7569,
+ "slid_loss": 0.8168,
+ "step": 2042,
+ "time": 11.99
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7613e-04",
+ "loss": 0.7766,
+ "slid_loss": 0.8168,
+ "step": 2043,
+ "time": 12.97
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7610e-04",
+ "loss": 0.8183,
+ "slid_loss": 0.8166,
+ "step": 2044,
+ "time": 12.8
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": "1.7608e-04",
+ "loss": 0.7463,
+ "slid_loss": 0.8161,
+ "step": 2045,
+ "time": 13.01
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7605e-04",
+ "loss": 0.7882,
+ "slid_loss": 0.8157,
+ "step": 2046,
+ "time": 13.68
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7602e-04",
+ "loss": 0.8785,
+ "slid_loss": 0.8158,
+ "step": 2047,
+ "time": 13.41
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7599e-04",
+ "loss": 0.8151,
+ "slid_loss": 0.8156,
+ "step": 2048,
+ "time": 14.29
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7596e-04",
+ "loss": 0.8491,
+ "slid_loss": 0.8161,
+ "step": 2049,
+ "time": 12.9
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7593e-04",
+ "loss": 0.8557,
+ "slid_loss": 0.8157,
+ "step": 2050,
+ "time": 12.99
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7590e-04",
+ "loss": 0.7562,
+ "slid_loss": 0.8145,
+ "step": 2051,
+ "time": 11.93
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7588e-04",
+ "loss": 0.9049,
+ "slid_loss": 0.8155,
+ "step": 2052,
+ "time": 13.53
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7585e-04",
+ "loss": 0.8791,
+ "slid_loss": 0.8161,
+ "step": 2053,
+ "time": 13.92
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7582e-04",
+ "loss": 0.8745,
+ "slid_loss": 0.8166,
+ "step": 2054,
+ "time": 12.88
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": "1.7579e-04",
+ "loss": 0.7358,
+ "slid_loss": 0.8161,
+ "step": 2055,
+ "time": 13.16
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7576e-04",
+ "loss": 0.8981,
+ "slid_loss": 0.8178,
+ "step": 2056,
+ "time": 13.04
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7573e-04",
+ "loss": 0.8748,
+ "slid_loss": 0.8191,
+ "step": 2057,
+ "time": 12.8
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7570e-04",
+ "loss": 0.7838,
+ "slid_loss": 0.8195,
+ "step": 2058,
+ "time": 13.58
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7567e-04",
+ "loss": 0.8423,
+ "slid_loss": 0.8203,
+ "step": 2059,
+ "time": 13.1
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7565e-04",
+ "loss": 0.8232,
+ "slid_loss": 0.8199,
+ "step": 2060,
+ "time": 12.07
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7562e-04",
+ "loss": 0.8262,
+ "slid_loss": 0.8207,
+ "step": 2061,
+ "time": 11.6
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7559e-04",
+ "loss": 0.8769,
+ "slid_loss": 0.8204,
+ "step": 2062,
+ "time": 11.8
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7556e-04",
+ "loss": 0.7845,
+ "slid_loss": 0.82,
+ "step": 2063,
+ "time": 13.83
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7553e-04",
+ "loss": 0.8354,
+ "slid_loss": 0.8206,
+ "step": 2064,
+ "time": 14.01
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7550e-04",
+ "loss": 0.8288,
+ "slid_loss": 0.8218,
+ "step": 2065,
+ "time": 13.9
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": "1.7547e-04",
+ "loss": 0.8627,
+ "slid_loss": 0.8216,
+ "step": 2066,
+ "time": 13.26
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7544e-04",
+ "loss": 0.8114,
+ "slid_loss": 0.821,
+ "step": 2067,
+ "time": 13.79
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7542e-04",
+ "loss": 0.8754,
+ "slid_loss": 0.8223,
+ "step": 2068,
+ "time": 13.98
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7539e-04",
+ "loss": 0.8515,
+ "slid_loss": 0.8221,
+ "step": 2069,
+ "time": 13.54
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7536e-04",
+ "loss": 0.7751,
+ "slid_loss": 0.8213,
+ "step": 2070,
+ "time": 12.83
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7533e-04",
+ "loss": 0.842,
+ "slid_loss": 0.8213,
+ "step": 2071,
+ "time": 13.3
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7530e-04",
+ "loss": 0.9551,
+ "slid_loss": 0.8233,
+ "step": 2072,
+ "time": 13.66
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7527e-04",
+ "loss": 0.9395,
+ "slid_loss": 0.8239,
+ "step": 2073,
+ "time": 14.04
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7524e-04",
+ "loss": 0.8937,
+ "slid_loss": 0.8245,
+ "step": 2074,
+ "time": 13.52
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7521e-04",
+ "loss": 0.8417,
+ "slid_loss": 0.8252,
+ "step": 2075,
+ "time": 11.41
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": "1.7518e-04",
+ "loss": 0.7155,
+ "slid_loss": 0.8251,
+ "step": 2076,
+ "time": 13.76
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7515e-04",
+ "loss": 0.8252,
+ "slid_loss": 0.8244,
+ "step": 2077,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7513e-04",
+ "loss": 0.7343,
+ "slid_loss": 0.8242,
+ "step": 2078,
+ "time": 13.18
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7510e-04",
+ "loss": 0.91,
+ "slid_loss": 0.8259,
+ "step": 2079,
+ "time": 12.83
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7507e-04",
+ "loss": 0.7312,
+ "slid_loss": 0.8254,
+ "step": 2080,
+ "time": 11.55
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7504e-04",
+ "loss": 0.8165,
+ "slid_loss": 0.8249,
+ "step": 2081,
+ "time": 13.89
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7501e-04",
+ "loss": 0.8129,
+ "slid_loss": 0.8245,
+ "step": 2082,
+ "time": 13.39
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7498e-04",
+ "loss": 0.8596,
+ "slid_loss": 0.8247,
+ "step": 2083,
+ "time": 173.62
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7495e-04",
+ "loss": 0.8486,
+ "slid_loss": 0.8248,
+ "step": 2084,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7492e-04",
+ "loss": 0.8069,
+ "slid_loss": 0.8243,
+ "step": 2085,
+ "time": 13.29
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7489e-04",
+ "loss": 0.8845,
+ "slid_loss": 0.8233,
+ "step": 2086,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": "1.7486e-04",
+ "loss": 0.8534,
+ "slid_loss": 0.8238,
+ "step": 2087,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7484e-04",
+ "loss": 0.8601,
+ "slid_loss": 0.8244,
+ "step": 2088,
+ "time": 13.39
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7481e-04",
+ "loss": 0.8688,
+ "slid_loss": 0.8255,
+ "step": 2089,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7478e-04",
+ "loss": 0.818,
+ "slid_loss": 0.8252,
+ "step": 2090,
+ "time": 13.83
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7475e-04",
+ "loss": 0.7879,
+ "slid_loss": 0.8244,
+ "step": 2091,
+ "time": 12.17
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7472e-04",
+ "loss": 0.7116,
+ "slid_loss": 0.8231,
+ "step": 2092,
+ "time": 13.79
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7469e-04",
+ "loss": 0.815,
+ "slid_loss": 0.8238,
+ "step": 2093,
+ "time": 13.66
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7466e-04",
+ "loss": 0.8384,
+ "slid_loss": 0.8234,
+ "step": 2094,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7463e-04",
+ "loss": 0.7947,
+ "slid_loss": 0.8239,
+ "step": 2095,
+ "time": 13.54
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7460e-04",
+ "loss": 0.8604,
+ "slid_loss": 0.824,
+ "step": 2096,
+ "time": 13.56
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": "1.7457e-04",
+ "loss": 0.8914,
+ "slid_loss": 0.8254,
+ "step": 2097,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7454e-04",
+ "loss": 0.8418,
+ "slid_loss": 0.8246,
+ "step": 2098,
+ "time": 12.98
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7451e-04",
+ "loss": 0.7767,
+ "slid_loss": 0.8238,
+ "step": 2099,
+ "time": 13.37
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7449e-04",
+ "loss": 0.6768,
+ "slid_loss": 0.8221,
+ "step": 2100,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7446e-04",
+ "loss": 0.8864,
+ "slid_loss": 0.8233,
+ "step": 2101,
+ "time": 12.18
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7443e-04",
+ "loss": 0.7445,
+ "slid_loss": 0.8228,
+ "step": 2102,
+ "time": 14.28
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7440e-04",
+ "loss": 0.7836,
+ "slid_loss": 0.8226,
+ "step": 2103,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7437e-04",
+ "loss": 0.8544,
+ "slid_loss": 0.8235,
+ "step": 2104,
+ "time": 11.44
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7434e-04",
+ "loss": 0.9254,
+ "slid_loss": 0.8248,
+ "step": 2105,
+ "time": 13.91
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7431e-04",
+ "loss": 0.794,
+ "slid_loss": 0.8256,
+ "step": 2106,
+ "time": 13.23
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7428e-04",
+ "loss": 0.7953,
+ "slid_loss": 0.825,
+ "step": 2107,
+ "time": 11.92
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": "1.7425e-04",
+ "loss": 0.8632,
+ "slid_loss": 0.8248,
+ "step": 2108,
+ "time": 13.97
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7422e-04",
+ "loss": 0.8479,
+ "slid_loss": 0.8245,
+ "step": 2109,
+ "time": 13.02
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7419e-04",
+ "loss": 0.8253,
+ "slid_loss": 0.8243,
+ "step": 2110,
+ "time": 13.35
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7416e-04",
+ "loss": 0.7581,
+ "slid_loss": 0.8229,
+ "step": 2111,
+ "time": 11.5
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7413e-04",
+ "loss": 0.8236,
+ "slid_loss": 0.824,
+ "step": 2112,
+ "time": 12.86
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7410e-04",
+ "loss": 0.7366,
+ "slid_loss": 0.8229,
+ "step": 2113,
+ "time": 12.91
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7407e-04",
+ "loss": 0.8839,
+ "slid_loss": 0.8229,
+ "step": 2114,
+ "time": 13.29
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7405e-04",
+ "loss": 0.8301,
+ "slid_loss": 0.823,
+ "step": 2115,
+ "time": 11.45
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7402e-04",
+ "loss": 0.8091,
+ "slid_loss": 0.8233,
+ "step": 2116,
+ "time": 13.31
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7399e-04",
+ "loss": 0.8629,
+ "slid_loss": 0.8244,
+ "step": 2117,
+ "time": 12.72
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": "1.7396e-04",
+ "loss": 0.7446,
+ "slid_loss": 0.8231,
+ "step": 2118,
+ "time": 14.24
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7393e-04",
+ "loss": 0.7852,
+ "slid_loss": 0.8234,
+ "step": 2119,
+ "time": 11.66
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7390e-04",
+ "loss": 0.7484,
+ "slid_loss": 0.823,
+ "step": 2120,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7387e-04",
+ "loss": 0.7833,
+ "slid_loss": 0.823,
+ "step": 2121,
+ "time": 13.36
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7384e-04",
+ "loss": 0.8712,
+ "slid_loss": 0.8234,
+ "step": 2122,
+ "time": 13.77
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7381e-04",
+ "loss": 0.8234,
+ "slid_loss": 0.8241,
+ "step": 2123,
+ "time": 13.09
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7378e-04",
+ "loss": 0.9055,
+ "slid_loss": 0.8242,
+ "step": 2124,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7375e-04",
+ "loss": 0.7091,
+ "slid_loss": 0.8237,
+ "step": 2125,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7372e-04",
+ "loss": 0.7065,
+ "slid_loss": 0.8223,
+ "step": 2126,
+ "time": 13.16
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7369e-04",
+ "loss": 0.8058,
+ "slid_loss": 0.8225,
+ "step": 2127,
+ "time": 12.64
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": "1.7366e-04",
+ "loss": 0.7702,
+ "slid_loss": 0.8223,
+ "step": 2128,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7363e-04",
+ "loss": 0.7705,
+ "slid_loss": 0.8215,
+ "step": 2129,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7360e-04",
+ "loss": 0.7022,
+ "slid_loss": 0.8208,
+ "step": 2130,
+ "time": 11.27
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7357e-04",
+ "loss": 0.8055,
+ "slid_loss": 0.8207,
+ "step": 2131,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7354e-04",
+ "loss": 0.8472,
+ "slid_loss": 0.8208,
+ "step": 2132,
+ "time": 12.34
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7351e-04",
+ "loss": 0.8472,
+ "slid_loss": 0.821,
+ "step": 2133,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7348e-04",
+ "loss": 0.8167,
+ "slid_loss": 0.8214,
+ "step": 2134,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7346e-04",
+ "loss": 0.7314,
+ "slid_loss": 0.8207,
+ "step": 2135,
+ "time": 11.38
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7343e-04",
+ "loss": 0.8021,
+ "slid_loss": 0.8203,
+ "step": 2136,
+ "time": 13.24
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7340e-04",
+ "loss": 0.8274,
+ "slid_loss": 0.8201,
+ "step": 2137,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7337e-04",
+ "loss": 0.8239,
+ "slid_loss": 0.8193,
+ "step": 2138,
+ "time": 11.37
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": "1.7334e-04",
+ "loss": 0.7907,
+ "slid_loss": 0.8187,
+ "step": 2139,
+ "time": 14.03
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7331e-04",
+ "loss": 0.8522,
+ "slid_loss": 0.8184,
+ "step": 2140,
+ "time": 15.07
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7328e-04",
+ "loss": 0.7952,
+ "slid_loss": 0.8189,
+ "step": 2141,
+ "time": 13.05
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7325e-04",
+ "loss": 0.9281,
+ "slid_loss": 0.8206,
+ "step": 2142,
+ "time": 13.15
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7322e-04",
+ "loss": 0.7154,
+ "slid_loss": 0.82,
+ "step": 2143,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7319e-04",
+ "loss": 0.7952,
+ "slid_loss": 0.8197,
+ "step": 2144,
+ "time": 13.74
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7316e-04",
+ "loss": 0.7374,
+ "slid_loss": 0.8197,
+ "step": 2145,
+ "time": 13.96
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7313e-04",
+ "loss": 0.7527,
+ "slid_loss": 0.8193,
+ "step": 2146,
+ "time": 13.58
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7310e-04",
+ "loss": 0.8629,
+ "slid_loss": 0.8191,
+ "step": 2147,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7307e-04",
+ "loss": 0.7596,
+ "slid_loss": 0.8186,
+ "step": 2148,
+ "time": 13.59
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": "1.7304e-04",
+ "loss": 0.8607,
+ "slid_loss": 0.8187,
+ "step": 2149,
+ "time": 14.03
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7301e-04",
+ "loss": 0.872,
+ "slid_loss": 0.8189,
+ "step": 2150,
+ "time": 12.81
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7298e-04",
+ "loss": 0.812,
+ "slid_loss": 0.8194,
+ "step": 2151,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7295e-04",
+ "loss": 0.7477,
+ "slid_loss": 0.8179,
+ "step": 2152,
+ "time": 11.45
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7292e-04",
+ "loss": 0.8091,
+ "slid_loss": 0.8172,
+ "step": 2153,
+ "time": 13.62
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7289e-04",
+ "loss": 0.7644,
+ "slid_loss": 0.8161,
+ "step": 2154,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7286e-04",
+ "loss": 0.7416,
+ "slid_loss": 0.8161,
+ "step": 2155,
+ "time": 13.67
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7283e-04",
+ "loss": 0.8153,
+ "slid_loss": 0.8153,
+ "step": 2156,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7280e-04",
+ "loss": 0.7929,
+ "slid_loss": 0.8145,
+ "step": 2157,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7277e-04",
+ "loss": 0.8095,
+ "slid_loss": 0.8147,
+ "step": 2158,
+ "time": 11.36
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7274e-04",
+ "loss": 0.7934,
+ "slid_loss": 0.8142,
+ "step": 2159,
+ "time": 13.69
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": "1.7271e-04",
+ "loss": 0.7191,
+ "slid_loss": 0.8132,
+ "step": 2160,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7268e-04",
+ "loss": 0.7659,
+ "slid_loss": 0.8126,
+ "step": 2161,
+ "time": 13.67
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7265e-04",
+ "loss": 0.7851,
+ "slid_loss": 0.8117,
+ "step": 2162,
+ "time": 13.77
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7262e-04",
+ "loss": 0.7829,
+ "slid_loss": 0.8117,
+ "step": 2163,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7259e-04",
+ "loss": 0.7987,
+ "slid_loss": 0.8113,
+ "step": 2164,
+ "time": 12.88
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7256e-04",
+ "loss": 0.7274,
+ "slid_loss": 0.8103,
+ "step": 2165,
+ "time": 13.29
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7253e-04",
+ "loss": 0.8484,
+ "slid_loss": 0.8101,
+ "step": 2166,
+ "time": 14.36
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7250e-04",
+ "loss": 0.8566,
+ "slid_loss": 0.8106,
+ "step": 2167,
+ "time": 13.83
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7247e-04",
+ "loss": 0.7429,
+ "slid_loss": 0.8093,
+ "step": 2168,
+ "time": 14.29
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7244e-04",
+ "loss": 0.8994,
+ "slid_loss": 0.8097,
+ "step": 2169,
+ "time": 13.54
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": "1.7241e-04",
+ "loss": 0.8917,
+ "slid_loss": 0.8109,
+ "step": 2170,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7238e-04",
+ "loss": 0.6748,
+ "slid_loss": 0.8092,
+ "step": 2171,
+ "time": 12.11
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7235e-04",
+ "loss": 0.8227,
+ "slid_loss": 0.8079,
+ "step": 2172,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7232e-04",
+ "loss": 0.921,
+ "slid_loss": 0.8077,
+ "step": 2173,
+ "time": 13.69
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7229e-04",
+ "loss": 0.8645,
+ "slid_loss": 0.8074,
+ "step": 2174,
+ "time": 14.1
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7226e-04",
+ "loss": 0.8723,
+ "slid_loss": 0.8077,
+ "step": 2175,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7223e-04",
+ "loss": 0.6827,
+ "slid_loss": 0.8074,
+ "step": 2176,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7220e-04",
+ "loss": 0.8471,
+ "slid_loss": 0.8076,
+ "step": 2177,
+ "time": 11.44
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7217e-04",
+ "loss": 0.7979,
+ "slid_loss": 0.8083,
+ "step": 2178,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7214e-04",
+ "loss": 0.7906,
+ "slid_loss": 0.8071,
+ "step": 2179,
+ "time": 13.99
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": "1.7211e-04",
+ "loss": 0.8215,
+ "slid_loss": 0.808,
+ "step": 2180,
+ "time": 12.84
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7208e-04",
+ "loss": 0.7861,
+ "slid_loss": 0.8077,
+ "step": 2181,
+ "time": 13.3
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7205e-04",
+ "loss": 0.8479,
+ "slid_loss": 0.808,
+ "step": 2182,
+ "time": 13.1
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7202e-04",
+ "loss": 0.8295,
+ "slid_loss": 0.8077,
+ "step": 2183,
+ "time": 13.54
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7199e-04",
+ "loss": 0.8292,
+ "slid_loss": 0.8075,
+ "step": 2184,
+ "time": 12.72
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7196e-04",
+ "loss": 0.8243,
+ "slid_loss": 0.8077,
+ "step": 2185,
+ "time": 14.12
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7193e-04",
+ "loss": 0.8517,
+ "slid_loss": 0.8074,
+ "step": 2186,
+ "time": 13.01
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7190e-04",
+ "loss": 0.9026,
+ "slid_loss": 0.8079,
+ "step": 2187,
+ "time": 13.95
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7187e-04",
+ "loss": 0.701,
+ "slid_loss": 0.8063,
+ "step": 2188,
+ "time": 12.9
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7184e-04",
+ "loss": 0.7578,
+ "slid_loss": 0.8052,
+ "step": 2189,
+ "time": 13.78
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7181e-04",
+ "loss": 0.8242,
+ "slid_loss": 0.8052,
+ "step": 2190,
+ "time": 13.31
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": "1.7178e-04",
+ "loss": 0.8126,
+ "slid_loss": 0.8055,
+ "step": 2191,
+ "time": 12.31
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7175e-04",
+ "loss": 0.8094,
+ "slid_loss": 0.8064,
+ "step": 2192,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7172e-04",
+ "loss": 0.768,
+ "slid_loss": 0.806,
+ "step": 2193,
+ "time": 13.59
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7169e-04",
+ "loss": 0.872,
+ "slid_loss": 0.8063,
+ "step": 2194,
+ "time": 13.26
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7166e-04",
+ "loss": 0.8102,
+ "slid_loss": 0.8065,
+ "step": 2195,
+ "time": 12.21
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7163e-04",
+ "loss": 0.7281,
+ "slid_loss": 0.8051,
+ "step": 2196,
+ "time": 13.83
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7160e-04",
+ "loss": 0.8073,
+ "slid_loss": 0.8043,
+ "step": 2197,
+ "time": 13.23
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7157e-04",
+ "loss": 0.855,
+ "slid_loss": 0.8044,
+ "step": 2198,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7154e-04",
+ "loss": 0.872,
+ "slid_loss": 0.8054,
+ "step": 2199,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7151e-04",
+ "loss": 0.6447,
+ "slid_loss": 0.8051,
+ "step": 2200,
+ "time": 11.85
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": "1.7148e-04",
+ "loss": 0.8157,
+ "slid_loss": 0.8044,
+ "step": 2201,
+ "time": 13.88
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7145e-04",
+ "loss": 0.8816,
+ "slid_loss": 0.8057,
+ "step": 2202,
+ "time": 14.14
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7142e-04",
+ "loss": 0.7806,
+ "slid_loss": 0.8057,
+ "step": 2203,
+ "time": 13.88
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7139e-04",
+ "loss": 0.8343,
+ "slid_loss": 0.8055,
+ "step": 2204,
+ "time": 14.29
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7136e-04",
+ "loss": 0.6909,
+ "slid_loss": 0.8031,
+ "step": 2205,
+ "time": 13.05
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7133e-04",
+ "loss": 0.8889,
+ "slid_loss": 0.8041,
+ "step": 2206,
+ "time": 13.97
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7130e-04",
+ "loss": 0.7157,
+ "slid_loss": 0.8033,
+ "step": 2207,
+ "time": 12.82
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7127e-04",
+ "loss": 0.7919,
+ "slid_loss": 0.8026,
+ "step": 2208,
+ "time": 11.83
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7124e-04",
+ "loss": 0.7788,
+ "slid_loss": 0.8019,
+ "step": 2209,
+ "time": 12.03
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7121e-04",
+ "loss": 0.8312,
+ "slid_loss": 0.802,
+ "step": 2210,
+ "time": 13.79
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7117e-04",
+ "loss": 0.8003,
+ "slid_loss": 0.8024,
+ "step": 2211,
+ "time": 11.86
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": "1.7114e-04",
+ "loss": 0.7627,
+ "slid_loss": 0.8018,
+ "step": 2212,
+ "time": 13.76
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7111e-04",
+ "loss": 0.793,
+ "slid_loss": 0.8023,
+ "step": 2213,
+ "time": 14.52
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7108e-04",
+ "loss": 0.7943,
+ "slid_loss": 0.8014,
+ "step": 2214,
+ "time": 13.0
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7105e-04",
+ "loss": 0.7813,
+ "slid_loss": 0.801,
+ "step": 2215,
+ "time": 13.3
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7102e-04",
+ "loss": 0.7085,
+ "slid_loss": 0.7999,
+ "step": 2216,
+ "time": 13.94
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7099e-04",
+ "loss": 0.8356,
+ "slid_loss": 0.7997,
+ "step": 2217,
+ "time": 13.12
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7096e-04",
+ "loss": 0.809,
+ "slid_loss": 0.8003,
+ "step": 2218,
+ "time": 13.59
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7093e-04",
+ "loss": 0.7378,
+ "slid_loss": 0.7998,
+ "step": 2219,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7090e-04",
+ "loss": 0.7442,
+ "slid_loss": 0.7998,
+ "step": 2220,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7087e-04",
+ "loss": 0.7218,
+ "slid_loss": 0.7992,
+ "step": 2221,
+ "time": 12.79
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": "1.7084e-04",
+ "loss": 0.8291,
+ "slid_loss": 0.7988,
+ "step": 2222,
+ "time": 12.76
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7081e-04",
+ "loss": 0.7834,
+ "slid_loss": 0.7984,
+ "step": 2223,
+ "time": 12.8
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7078e-04",
+ "loss": 0.7405,
+ "slid_loss": 0.7967,
+ "step": 2224,
+ "time": 14.2
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7075e-04",
+ "loss": 0.7425,
+ "slid_loss": 0.797,
+ "step": 2225,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7072e-04",
+ "loss": 0.7462,
+ "slid_loss": 0.7974,
+ "step": 2226,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7069e-04",
+ "loss": 0.8373,
+ "slid_loss": 0.7978,
+ "step": 2227,
+ "time": 13.85
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7066e-04",
+ "loss": 0.7493,
+ "slid_loss": 0.7976,
+ "step": 2228,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7063e-04",
+ "loss": 0.8001,
+ "slid_loss": 0.7978,
+ "step": 2229,
+ "time": 12.41
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7060e-04",
+ "loss": 0.7578,
+ "slid_loss": 0.7984,
+ "step": 2230,
+ "time": 13.46
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7057e-04",
+ "loss": 0.8109,
+ "slid_loss": 0.7985,
+ "step": 2231,
+ "time": 12.2
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": "1.7053e-04",
+ "loss": 0.7628,
+ "slid_loss": 0.7976,
+ "step": 2232,
+ "time": 13.15
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7050e-04",
+ "loss": 0.8082,
+ "slid_loss": 0.7972,
+ "step": 2233,
+ "time": 13.09
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7047e-04",
+ "loss": 0.8269,
+ "slid_loss": 0.7973,
+ "step": 2234,
+ "time": 13.69
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7044e-04",
+ "loss": 0.7934,
+ "slid_loss": 0.7979,
+ "step": 2235,
+ "time": 13.86
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7041e-04",
+ "loss": 0.7865,
+ "slid_loss": 0.7978,
+ "step": 2236,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7038e-04",
+ "loss": 0.7845,
+ "slid_loss": 0.7974,
+ "step": 2237,
+ "time": 14.49
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7035e-04",
+ "loss": 0.7661,
+ "slid_loss": 0.7968,
+ "step": 2238,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7032e-04",
+ "loss": 0.7414,
+ "slid_loss": 0.7963,
+ "step": 2239,
+ "time": 12.2
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7029e-04",
+ "loss": 0.8403,
+ "slid_loss": 0.7962,
+ "step": 2240,
+ "time": 14.26
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7026e-04",
+ "loss": 0.7392,
+ "slid_loss": 0.7956,
+ "step": 2241,
+ "time": 13.82
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7023e-04",
+ "loss": 0.8023,
+ "slid_loss": 0.7944,
+ "step": 2242,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": "1.7020e-04",
+ "loss": 0.8651,
+ "slid_loss": 0.7959,
+ "step": 2243,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.7017e-04",
+ "loss": 0.8172,
+ "slid_loss": 0.7961,
+ "step": 2244,
+ "time": 12.9
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.7014e-04",
+ "loss": 0.8359,
+ "slid_loss": 0.7971,
+ "step": 2245,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.7011e-04",
+ "loss": 0.7136,
+ "slid_loss": 0.7967,
+ "step": 2246,
+ "time": 13.47
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.7008e-04",
+ "loss": 0.7401,
+ "slid_loss": 0.7954,
+ "step": 2247,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.7004e-04",
+ "loss": 0.8518,
+ "slid_loss": 0.7964,
+ "step": 2248,
+ "time": 13.91
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.7001e-04",
+ "loss": 0.7898,
+ "slid_loss": 0.7957,
+ "step": 2249,
+ "time": 13.07
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.6998e-04",
+ "loss": 0.8022,
+ "slid_loss": 0.795,
+ "step": 2250,
+ "time": 13.7
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.6995e-04",
+ "loss": 0.8057,
+ "slid_loss": 0.7949,
+ "step": 2251,
+ "time": 14.76
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.6992e-04",
+ "loss": 0.7467,
+ "slid_loss": 0.7949,
+ "step": 2252,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": "1.6989e-04",
+ "loss": 0.7295,
+ "slid_loss": 0.7941,
+ "step": 2253,
+ "time": 13.11
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6986e-04",
+ "loss": 0.8899,
+ "slid_loss": 0.7953,
+ "step": 2254,
+ "time": 11.4
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6983e-04",
+ "loss": 0.7359,
+ "slid_loss": 0.7953,
+ "step": 2255,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6980e-04",
+ "loss": 0.8267,
+ "slid_loss": 0.7954,
+ "step": 2256,
+ "time": 14.08
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6977e-04",
+ "loss": 0.823,
+ "slid_loss": 0.7957,
+ "step": 2257,
+ "time": 13.38
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6974e-04",
+ "loss": 0.7864,
+ "slid_loss": 0.7955,
+ "step": 2258,
+ "time": 13.72
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6971e-04",
+ "loss": 0.8135,
+ "slid_loss": 0.7957,
+ "step": 2259,
+ "time": 12.36
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6968e-04",
+ "loss": 0.8588,
+ "slid_loss": 0.7971,
+ "step": 2260,
+ "time": 12.41
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6964e-04",
+ "loss": 0.9041,
+ "slid_loss": 0.7984,
+ "step": 2261,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6961e-04",
+ "loss": 0.7461,
+ "slid_loss": 0.7981,
+ "step": 2262,
+ "time": 12.28
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6958e-04",
+ "loss": 0.783,
+ "slid_loss": 0.7981,
+ "step": 2263,
+ "time": 12.81
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": "1.6955e-04",
+ "loss": 0.8091,
+ "slid_loss": 0.7982,
+ "step": 2264,
+ "time": 13.98
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6952e-04",
+ "loss": 0.7466,
+ "slid_loss": 0.7984,
+ "step": 2265,
+ "time": 13.11
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6949e-04",
+ "loss": 0.815,
+ "slid_loss": 0.798,
+ "step": 2266,
+ "time": 14.01
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6946e-04",
+ "loss": 0.7752,
+ "slid_loss": 0.7972,
+ "step": 2267,
+ "time": 13.44
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6943e-04",
+ "loss": 0.8469,
+ "slid_loss": 0.7982,
+ "step": 2268,
+ "time": 11.55
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6940e-04",
+ "loss": 0.6493,
+ "slid_loss": 0.7957,
+ "step": 2269,
+ "time": 12.92
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6937e-04",
+ "loss": 0.9232,
+ "slid_loss": 0.7961,
+ "step": 2270,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6934e-04",
+ "loss": 0.8403,
+ "slid_loss": 0.7977,
+ "step": 2271,
+ "time": 12.43
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6930e-04",
+ "loss": 0.7851,
+ "slid_loss": 0.7973,
+ "step": 2272,
+ "time": 11.77
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6927e-04",
+ "loss": 0.837,
+ "slid_loss": 0.7965,
+ "step": 2273,
+ "time": 13.64
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": "1.6924e-04",
+ "loss": 0.8088,
+ "slid_loss": 0.7959,
+ "step": 2274,
+ "time": 12.15
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6921e-04",
+ "loss": 0.8475,
+ "slid_loss": 0.7957,
+ "step": 2275,
+ "time": 12.3
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6918e-04",
+ "loss": 0.8388,
+ "slid_loss": 0.7973,
+ "step": 2276,
+ "time": 12.1
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6915e-04",
+ "loss": 0.6712,
+ "slid_loss": 0.7955,
+ "step": 2277,
+ "time": 13.59
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6912e-04",
+ "loss": 0.7821,
+ "slid_loss": 0.7953,
+ "step": 2278,
+ "time": 13.2
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6909e-04",
+ "loss": 0.7661,
+ "slid_loss": 0.7951,
+ "step": 2279,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6906e-04",
+ "loss": 0.6763,
+ "slid_loss": 0.7936,
+ "step": 2280,
+ "time": 13.74
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6903e-04",
+ "loss": 0.8165,
+ "slid_loss": 0.7939,
+ "step": 2281,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6899e-04",
+ "loss": 0.7703,
+ "slid_loss": 0.7932,
+ "step": 2282,
+ "time": 13.82
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6896e-04",
+ "loss": 0.7351,
+ "slid_loss": 0.7922,
+ "step": 2283,
+ "time": 11.87
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": "1.6893e-04",
+ "loss": 0.6802,
+ "slid_loss": 0.7907,
+ "step": 2284,
+ "time": 14.03
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6890e-04",
+ "loss": 0.8116,
+ "slid_loss": 0.7906,
+ "step": 2285,
+ "time": 11.56
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6887e-04",
+ "loss": 0.7986,
+ "slid_loss": 0.7901,
+ "step": 2286,
+ "time": 13.35
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6884e-04",
+ "loss": 0.867,
+ "slid_loss": 0.7897,
+ "step": 2287,
+ "time": 14.06
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6881e-04",
+ "loss": 0.7727,
+ "slid_loss": 0.7904,
+ "step": 2288,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6878e-04",
+ "loss": 0.7279,
+ "slid_loss": 0.7901,
+ "step": 2289,
+ "time": 13.43
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6875e-04",
+ "loss": 0.8506,
+ "slid_loss": 0.7904,
+ "step": 2290,
+ "time": 13.79
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6872e-04",
+ "loss": 0.8056,
+ "slid_loss": 0.7903,
+ "step": 2291,
+ "time": 13.3
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6868e-04",
+ "loss": 0.8424,
+ "slid_loss": 0.7907,
+ "step": 2292,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6865e-04",
+ "loss": 0.7595,
+ "slid_loss": 0.7906,
+ "step": 2293,
+ "time": 13.26
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6862e-04",
+ "loss": 0.695,
+ "slid_loss": 0.7888,
+ "step": 2294,
+ "time": 13.87
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": "1.6859e-04",
+ "loss": 0.7358,
+ "slid_loss": 0.7881,
+ "step": 2295,
+ "time": 12.29
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6856e-04",
+ "loss": 0.8463,
+ "slid_loss": 0.7892,
+ "step": 2296,
+ "time": 12.86
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6853e-04",
+ "loss": 0.7959,
+ "slid_loss": 0.7891,
+ "step": 2297,
+ "time": 11.48
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6850e-04",
+ "loss": 0.839,
+ "slid_loss": 0.789,
+ "step": 2298,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6847e-04",
+ "loss": 0.8562,
+ "slid_loss": 0.7888,
+ "step": 2299,
+ "time": 13.41
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6843e-04",
+ "loss": 0.7578,
+ "slid_loss": 0.7899,
+ "step": 2300,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6840e-04",
+ "loss": 0.7658,
+ "slid_loss": 0.7894,
+ "step": 2301,
+ "time": 11.97
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6837e-04",
+ "loss": 0.7181,
+ "slid_loss": 0.7878,
+ "step": 2302,
+ "time": 12.84
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6834e-04",
+ "loss": 0.8497,
+ "slid_loss": 0.7885,
+ "step": 2303,
+ "time": 13.41
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6831e-04",
+ "loss": 0.8063,
+ "slid_loss": 0.7882,
+ "step": 2304,
+ "time": 10.88
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": "1.6828e-04",
+ "loss": 0.7609,
+ "slid_loss": 0.7889,
+ "step": 2305,
+ "time": 13.8
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6825e-04",
+ "loss": 0.767,
+ "slid_loss": 0.7877,
+ "step": 2306,
+ "time": 10.76
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6822e-04",
+ "loss": 0.8057,
+ "slid_loss": 0.7886,
+ "step": 2307,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6819e-04",
+ "loss": 0.8459,
+ "slid_loss": 0.7891,
+ "step": 2308,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6815e-04",
+ "loss": 0.8029,
+ "slid_loss": 0.7894,
+ "step": 2309,
+ "time": 12.63
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6812e-04",
+ "loss": 0.783,
+ "slid_loss": 0.7889,
+ "step": 2310,
+ "time": 13.76
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6809e-04",
+ "loss": 0.7074,
+ "slid_loss": 0.788,
+ "step": 2311,
+ "time": 13.86
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6806e-04",
+ "loss": 0.8522,
+ "slid_loss": 0.7889,
+ "step": 2312,
+ "time": 14.04
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6803e-04",
+ "loss": 0.7792,
+ "slid_loss": 0.7887,
+ "step": 2313,
+ "time": 12.84
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6800e-04",
+ "loss": 0.7522,
+ "slid_loss": 0.7883,
+ "step": 2314,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6797e-04",
+ "loss": 0.8422,
+ "slid_loss": 0.7889,
+ "step": 2315,
+ "time": 11.81
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": "1.6794e-04",
+ "loss": 0.8003,
+ "slid_loss": 0.7898,
+ "step": 2316,
+ "time": 13.99
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6790e-04",
+ "loss": 0.8567,
+ "slid_loss": 0.79,
+ "step": 2317,
+ "time": 12.72
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6787e-04",
+ "loss": 0.7813,
+ "slid_loss": 0.7898,
+ "step": 2318,
+ "time": 11.83
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6784e-04",
+ "loss": 0.8504,
+ "slid_loss": 0.7909,
+ "step": 2319,
+ "time": 13.67
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6781e-04",
+ "loss": 0.8548,
+ "slid_loss": 0.792,
+ "step": 2320,
+ "time": 13.38
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6778e-04",
+ "loss": 0.7591,
+ "slid_loss": 0.7924,
+ "step": 2321,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6775e-04",
+ "loss": 0.7466,
+ "slid_loss": 0.7916,
+ "step": 2322,
+ "time": 12.48
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6772e-04",
+ "loss": 0.8087,
+ "slid_loss": 0.7918,
+ "step": 2323,
+ "time": 13.92
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6768e-04",
+ "loss": 0.7242,
+ "slid_loss": 0.7916,
+ "step": 2324,
+ "time": 12.92
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6765e-04",
+ "loss": 0.7763,
+ "slid_loss": 0.792,
+ "step": 2325,
+ "time": 13.39
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": "1.6762e-04",
+ "loss": 0.7529,
+ "slid_loss": 0.792,
+ "step": 2326,
+ "time": 13.01
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6759e-04",
+ "loss": 0.8644,
+ "slid_loss": 0.7923,
+ "step": 2327,
+ "time": 13.58
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6756e-04",
+ "loss": 0.7668,
+ "slid_loss": 0.7925,
+ "step": 2328,
+ "time": 11.64
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6753e-04",
+ "loss": 0.8452,
+ "slid_loss": 0.7929,
+ "step": 2329,
+ "time": 12.78
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6750e-04",
+ "loss": 0.7508,
+ "slid_loss": 0.7929,
+ "step": 2330,
+ "time": 12.93
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6746e-04",
+ "loss": 0.6384,
+ "slid_loss": 0.7911,
+ "step": 2331,
+ "time": 11.64
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6743e-04",
+ "loss": 0.776,
+ "slid_loss": 0.7913,
+ "step": 2332,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6740e-04",
+ "loss": 0.8438,
+ "slid_loss": 0.7916,
+ "step": 2333,
+ "time": 12.15
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6737e-04",
+ "loss": 0.7734,
+ "slid_loss": 0.7911,
+ "step": 2334,
+ "time": 12.84
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6734e-04",
+ "loss": 0.7763,
+ "slid_loss": 0.7909,
+ "step": 2335,
+ "time": 13.36
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6731e-04",
+ "loss": 0.7231,
+ "slid_loss": 0.7903,
+ "step": 2336,
+ "time": 13.56
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": "1.6728e-04",
+ "loss": 0.8055,
+ "slid_loss": 0.7905,
+ "step": 2337,
+ "time": 12.52
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6724e-04",
+ "loss": 0.7972,
+ "slid_loss": 0.7908,
+ "step": 2338,
+ "time": 13.48
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6721e-04",
+ "loss": 0.7652,
+ "slid_loss": 0.7911,
+ "step": 2339,
+ "time": 13.69
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6718e-04",
+ "loss": 0.778,
+ "slid_loss": 0.7904,
+ "step": 2340,
+ "time": 12.3
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6715e-04",
+ "loss": 0.7265,
+ "slid_loss": 0.7903,
+ "step": 2341,
+ "time": 13.1
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6712e-04",
+ "loss": 0.8467,
+ "slid_loss": 0.7907,
+ "step": 2342,
+ "time": 14.03
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6709e-04",
+ "loss": 0.7228,
+ "slid_loss": 0.7893,
+ "step": 2343,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6706e-04",
+ "loss": 0.7076,
+ "slid_loss": 0.7882,
+ "step": 2344,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6702e-04",
+ "loss": 0.8349,
+ "slid_loss": 0.7882,
+ "step": 2345,
+ "time": 13.15
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6699e-04",
+ "loss": 0.8123,
+ "slid_loss": 0.7892,
+ "step": 2346,
+ "time": 13.2
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": "1.6696e-04",
+ "loss": 0.7886,
+ "slid_loss": 0.7897,
+ "step": 2347,
+ "time": 13.72
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6693e-04",
+ "loss": 0.7735,
+ "slid_loss": 0.7889,
+ "step": 2348,
+ "time": 11.02
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6690e-04",
+ "loss": 0.8651,
+ "slid_loss": 0.7897,
+ "step": 2349,
+ "time": 12.83
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6687e-04",
+ "loss": 0.7521,
+ "slid_loss": 0.7892,
+ "step": 2350,
+ "time": 12.67
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6683e-04",
+ "loss": 0.7954,
+ "slid_loss": 0.7891,
+ "step": 2351,
+ "time": 12.06
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6680e-04",
+ "loss": 0.8034,
+ "slid_loss": 0.7896,
+ "step": 2352,
+ "time": 12.2
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6677e-04",
+ "loss": 0.7668,
+ "slid_loss": 0.79,
+ "step": 2353,
+ "time": 13.38
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6674e-04",
+ "loss": 0.7665,
+ "slid_loss": 0.7888,
+ "step": 2354,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6671e-04",
+ "loss": 0.7346,
+ "slid_loss": 0.7887,
+ "step": 2355,
+ "time": 13.14
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6668e-04",
+ "loss": 0.8758,
+ "slid_loss": 0.7892,
+ "step": 2356,
+ "time": 12.94
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": "1.6665e-04",
+ "loss": 0.8132,
+ "slid_loss": 0.7891,
+ "step": 2357,
+ "time": 13.35
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6661e-04",
+ "loss": 0.7162,
+ "slid_loss": 0.7884,
+ "step": 2358,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6658e-04",
+ "loss": 0.8178,
+ "slid_loss": 0.7885,
+ "step": 2359,
+ "time": 11.26
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6655e-04",
+ "loss": 0.7531,
+ "slid_loss": 0.7874,
+ "step": 2360,
+ "time": 13.53
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6652e-04",
+ "loss": 0.8243,
+ "slid_loss": 0.7866,
+ "step": 2361,
+ "time": 13.28
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6649e-04",
+ "loss": 0.7408,
+ "slid_loss": 0.7866,
+ "step": 2362,
+ "time": 14.02
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6646e-04",
+ "loss": 0.7641,
+ "slid_loss": 0.7864,
+ "step": 2363,
+ "time": 14.03
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6642e-04",
+ "loss": 0.7551,
+ "slid_loss": 0.7858,
+ "step": 2364,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6639e-04",
+ "loss": 0.8734,
+ "slid_loss": 0.7871,
+ "step": 2365,
+ "time": 11.24
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6636e-04",
+ "loss": 0.7956,
+ "slid_loss": 0.7869,
+ "step": 2366,
+ "time": 13.12
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6633e-04",
+ "loss": 0.7144,
+ "slid_loss": 0.7863,
+ "step": 2367,
+ "time": 11.58
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": "1.6630e-04",
+ "loss": 0.74,
+ "slid_loss": 0.7852,
+ "step": 2368,
+ "time": 13.49
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6627e-04",
+ "loss": 0.8097,
+ "slid_loss": 0.7868,
+ "step": 2369,
+ "time": 13.67
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6623e-04",
+ "loss": 0.7571,
+ "slid_loss": 0.7852,
+ "step": 2370,
+ "time": 13.92
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6620e-04",
+ "loss": 0.7708,
+ "slid_loss": 0.7845,
+ "step": 2371,
+ "time": 13.42
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6617e-04",
+ "loss": 0.8583,
+ "slid_loss": 0.7852,
+ "step": 2372,
+ "time": 13.91
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6614e-04",
+ "loss": 0.65,
+ "slid_loss": 0.7834,
+ "step": 2373,
+ "time": 12.26
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6611e-04",
+ "loss": 0.6606,
+ "slid_loss": 0.7819,
+ "step": 2374,
+ "time": 14.12
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6607e-04",
+ "loss": 0.7866,
+ "slid_loss": 0.7813,
+ "step": 2375,
+ "time": 13.72
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6604e-04",
+ "loss": 0.7201,
+ "slid_loss": 0.7801,
+ "step": 2376,
+ "time": 12.76
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6601e-04",
+ "loss": 0.7036,
+ "slid_loss": 0.7804,
+ "step": 2377,
+ "time": 11.71
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": "1.6598e-04",
+ "loss": 0.6641,
+ "slid_loss": 0.7792,
+ "step": 2378,
+ "time": 14.01
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6595e-04",
+ "loss": 0.707,
+ "slid_loss": 0.7786,
+ "step": 2379,
+ "time": 12.15
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6592e-04",
+ "loss": 0.7834,
+ "slid_loss": 0.7797,
+ "step": 2380,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6588e-04",
+ "loss": 0.719,
+ "slid_loss": 0.7787,
+ "step": 2381,
+ "time": 13.88
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6585e-04",
+ "loss": 0.8067,
+ "slid_loss": 0.7791,
+ "step": 2382,
+ "time": 15.35
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6582e-04",
+ "loss": 0.8457,
+ "slid_loss": 0.7802,
+ "step": 2383,
+ "time": 12.92
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6579e-04",
+ "loss": 0.7335,
+ "slid_loss": 0.7807,
+ "step": 2384,
+ "time": 14.3
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6576e-04",
+ "loss": 0.7939,
+ "slid_loss": 0.7806,
+ "step": 2385,
+ "time": 12.65
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6572e-04",
+ "loss": 0.8367,
+ "slid_loss": 0.7809,
+ "step": 2386,
+ "time": 13.37
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6569e-04",
+ "loss": 0.7839,
+ "slid_loss": 0.7801,
+ "step": 2387,
+ "time": 13.19
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6566e-04",
+ "loss": 0.6895,
+ "slid_loss": 0.7793,
+ "step": 2388,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": "1.6563e-04",
+ "loss": 0.7283,
+ "slid_loss": 0.7793,
+ "step": 2389,
+ "time": 12.35
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6560e-04",
+ "loss": 0.7126,
+ "slid_loss": 0.7779,
+ "step": 2390,
+ "time": 13.61
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6557e-04",
+ "loss": 0.7547,
+ "slid_loss": 0.7774,
+ "step": 2391,
+ "time": 12.68
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6553e-04",
+ "loss": 0.9244,
+ "slid_loss": 0.7782,
+ "step": 2392,
+ "time": 13.96
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6550e-04",
+ "loss": 0.8088,
+ "slid_loss": 0.7787,
+ "step": 2393,
+ "time": 13.87
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6547e-04",
+ "loss": 0.7744,
+ "slid_loss": 0.7795,
+ "step": 2394,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6544e-04",
+ "loss": 0.6976,
+ "slid_loss": 0.7791,
+ "step": 2395,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6541e-04",
+ "loss": 0.7995,
+ "slid_loss": 0.7786,
+ "step": 2396,
+ "time": 13.11
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6537e-04",
+ "loss": 0.78,
+ "slid_loss": 0.7785,
+ "step": 2397,
+ "time": 13.87
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6534e-04",
+ "loss": 0.7534,
+ "slid_loss": 0.7776,
+ "step": 2398,
+ "time": 12.73
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": "1.6531e-04",
+ "loss": 0.749,
+ "slid_loss": 0.7766,
+ "step": 2399,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6528e-04",
+ "loss": 0.8434,
+ "slid_loss": 0.7774,
+ "step": 2400,
+ "time": 13.12
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6525e-04",
+ "loss": 0.8167,
+ "slid_loss": 0.7779,
+ "step": 2401,
+ "time": 11.93
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6521e-04",
+ "loss": 0.7369,
+ "slid_loss": 0.7781,
+ "step": 2402,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6518e-04",
+ "loss": 0.7046,
+ "slid_loss": 0.7767,
+ "step": 2403,
+ "time": 13.19
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6515e-04",
+ "loss": 0.8158,
+ "slid_loss": 0.7767,
+ "step": 2404,
+ "time": 13.11
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6512e-04",
+ "loss": 0.8183,
+ "slid_loss": 0.7773,
+ "step": 2405,
+ "time": 13.58
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6509e-04",
+ "loss": 0.7396,
+ "slid_loss": 0.777,
+ "step": 2406,
+ "time": 13.9
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6505e-04",
+ "loss": 0.8199,
+ "slid_loss": 0.7772,
+ "step": 2407,
+ "time": 14.2
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6502e-04",
+ "loss": 0.7895,
+ "slid_loss": 0.7766,
+ "step": 2408,
+ "time": 12.91
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": "1.6499e-04",
+ "loss": 0.7928,
+ "slid_loss": 0.7765,
+ "step": 2409,
+ "time": 13.44
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6496e-04",
+ "loss": 0.7698,
+ "slid_loss": 0.7764,
+ "step": 2410,
+ "time": 14.13
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6493e-04",
+ "loss": 0.8022,
+ "slid_loss": 0.7773,
+ "step": 2411,
+ "time": 11.31
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6489e-04",
+ "loss": 0.7736,
+ "slid_loss": 0.7766,
+ "step": 2412,
+ "time": 12.9
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6486e-04",
+ "loss": 0.783,
+ "slid_loss": 0.7766,
+ "step": 2413,
+ "time": 12.88
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6483e-04",
+ "loss": 0.7696,
+ "slid_loss": 0.7768,
+ "step": 2414,
+ "time": 12.81
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6480e-04",
+ "loss": 0.6847,
+ "slid_loss": 0.7752,
+ "step": 2415,
+ "time": 13.18
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6477e-04",
+ "loss": 0.7745,
+ "slid_loss": 0.7749,
+ "step": 2416,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6473e-04",
+ "loss": 0.8408,
+ "slid_loss": 0.7748,
+ "step": 2417,
+ "time": 13.21
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6470e-04",
+ "loss": 0.6568,
+ "slid_loss": 0.7735,
+ "step": 2418,
+ "time": 12.92
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6467e-04",
+ "loss": 0.8376,
+ "slid_loss": 0.7734,
+ "step": 2419,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": "1.6464e-04",
+ "loss": 0.7695,
+ "slid_loss": 0.7726,
+ "step": 2420,
+ "time": 12.99
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6461e-04",
+ "loss": 0.6859,
+ "slid_loss": 0.7718,
+ "step": 2421,
+ "time": 12.27
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6457e-04",
+ "loss": 0.7864,
+ "slid_loss": 0.7722,
+ "step": 2422,
+ "time": 13.59
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6454e-04",
+ "loss": 0.7615,
+ "slid_loss": 0.7717,
+ "step": 2423,
+ "time": 13.63
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6451e-04",
+ "loss": 0.6796,
+ "slid_loss": 0.7713,
+ "step": 2424,
+ "time": 13.85
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6448e-04",
+ "loss": 0.7267,
+ "slid_loss": 0.7708,
+ "step": 2425,
+ "time": 13.24
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6445e-04",
+ "loss": 0.6368,
+ "slid_loss": 0.7696,
+ "step": 2426,
+ "time": 13.12
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6441e-04",
+ "loss": 0.728,
+ "slid_loss": 0.7683,
+ "step": 2427,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6438e-04",
+ "loss": 0.8599,
+ "slid_loss": 0.7692,
+ "step": 2428,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6435e-04",
+ "loss": 0.7962,
+ "slid_loss": 0.7687,
+ "step": 2429,
+ "time": 13.97
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": "1.6432e-04",
+ "loss": 0.7078,
+ "slid_loss": 0.7683,
+ "step": 2430,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6429e-04",
+ "loss": 0.7533,
+ "slid_loss": 0.7694,
+ "step": 2431,
+ "time": 13.73
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6425e-04",
+ "loss": 0.7104,
+ "slid_loss": 0.7688,
+ "step": 2432,
+ "time": 13.94
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6422e-04",
+ "loss": 0.7481,
+ "slid_loss": 0.7678,
+ "step": 2433,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6419e-04",
+ "loss": 0.7471,
+ "slid_loss": 0.7676,
+ "step": 2434,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6416e-04",
+ "loss": 0.7693,
+ "slid_loss": 0.7675,
+ "step": 2435,
+ "time": 13.86
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6412e-04",
+ "loss": 0.843,
+ "slid_loss": 0.7687,
+ "step": 2436,
+ "time": 13.61
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6409e-04",
+ "loss": 0.776,
+ "slid_loss": 0.7684,
+ "step": 2437,
+ "time": 12.81
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6406e-04",
+ "loss": 0.8103,
+ "slid_loss": 0.7685,
+ "step": 2438,
+ "time": 13.75
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6403e-04",
+ "loss": 0.823,
+ "slid_loss": 0.7691,
+ "step": 2439,
+ "time": 13.67
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6400e-04",
+ "loss": 0.7896,
+ "slid_loss": 0.7692,
+ "step": 2440,
+ "time": 12.99
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": "1.6396e-04",
+ "loss": 0.8034,
+ "slid_loss": 0.77,
+ "step": 2441,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6393e-04",
+ "loss": 0.7983,
+ "slid_loss": 0.7695,
+ "step": 2442,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6390e-04",
+ "loss": 0.7707,
+ "slid_loss": 0.77,
+ "step": 2443,
+ "time": 14.22
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6387e-04",
+ "loss": 0.7461,
+ "slid_loss": 0.7704,
+ "step": 2444,
+ "time": 11.59
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6383e-04",
+ "loss": 0.7276,
+ "slid_loss": 0.7693,
+ "step": 2445,
+ "time": 11.76
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6380e-04",
+ "loss": 0.7686,
+ "slid_loss": 0.7689,
+ "step": 2446,
+ "time": 14.17
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6377e-04",
+ "loss": 0.8094,
+ "slid_loss": 0.7691,
+ "step": 2447,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6374e-04",
+ "loss": 0.8325,
+ "slid_loss": 0.7697,
+ "step": 2448,
+ "time": 13.94
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6371e-04",
+ "loss": 0.8013,
+ "slid_loss": 0.769,
+ "step": 2449,
+ "time": 13.46
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6367e-04",
+ "loss": 0.7921,
+ "slid_loss": 0.7694,
+ "step": 2450,
+ "time": 12.91
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": "1.6364e-04",
+ "loss": 0.8464,
+ "slid_loss": 0.7699,
+ "step": 2451,
+ "time": 12.76
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6361e-04",
+ "loss": 0.7586,
+ "slid_loss": 0.7695,
+ "step": 2452,
+ "time": 12.79
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6358e-04",
+ "loss": 0.7911,
+ "slid_loss": 0.7697,
+ "step": 2453,
+ "time": 13.54
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6354e-04",
+ "loss": 0.7466,
+ "slid_loss": 0.7695,
+ "step": 2454,
+ "time": 13.21
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6351e-04",
+ "loss": 0.773,
+ "slid_loss": 0.7699,
+ "step": 2455,
+ "time": 11.56
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6348e-04",
+ "loss": 0.7781,
+ "slid_loss": 0.7689,
+ "step": 2456,
+ "time": 11.76
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6345e-04",
+ "loss": 0.718,
+ "slid_loss": 0.768,
+ "step": 2457,
+ "time": 12.81
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6342e-04",
+ "loss": 0.7654,
+ "slid_loss": 0.7685,
+ "step": 2458,
+ "time": 14.09
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6338e-04",
+ "loss": 0.7935,
+ "slid_loss": 0.7682,
+ "step": 2459,
+ "time": 11.66
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6335e-04",
+ "loss": 0.7984,
+ "slid_loss": 0.7687,
+ "step": 2460,
+ "time": 13.9
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": "1.6332e-04",
+ "loss": 0.9206,
+ "slid_loss": 0.7696,
+ "step": 2461,
+ "time": 12.22
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6329e-04",
+ "loss": 0.7754,
+ "slid_loss": 0.77,
+ "step": 2462,
+ "time": 11.66
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6325e-04",
+ "loss": 0.6438,
+ "slid_loss": 0.7688,
+ "step": 2463,
+ "time": 12.09
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6322e-04",
+ "loss": 0.7966,
+ "slid_loss": 0.7692,
+ "step": 2464,
+ "time": 13.54
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6319e-04",
+ "loss": 0.7955,
+ "slid_loss": 0.7684,
+ "step": 2465,
+ "time": 13.7
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6316e-04",
+ "loss": 0.7837,
+ "slid_loss": 0.7683,
+ "step": 2466,
+ "time": 13.58
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6312e-04",
+ "loss": 0.7989,
+ "slid_loss": 0.7691,
+ "step": 2467,
+ "time": 11.4
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6309e-04",
+ "loss": 0.6558,
+ "slid_loss": 0.7683,
+ "step": 2468,
+ "time": 13.77
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6306e-04",
+ "loss": 0.7811,
+ "slid_loss": 0.768,
+ "step": 2469,
+ "time": 11.42
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6303e-04",
+ "loss": 0.8034,
+ "slid_loss": 0.7685,
+ "step": 2470,
+ "time": 12.5
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6299e-04",
+ "loss": 0.8127,
+ "slid_loss": 0.7689,
+ "step": 2471,
+ "time": 11.92
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": "1.6296e-04",
+ "loss": 0.7403,
+ "slid_loss": 0.7677,
+ "step": 2472,
+ "time": 13.83
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6293e-04",
+ "loss": 0.6105,
+ "slid_loss": 0.7673,
+ "step": 2473,
+ "time": 12.73
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6290e-04",
+ "loss": 0.8256,
+ "slid_loss": 0.769,
+ "step": 2474,
+ "time": 14.34
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6287e-04",
+ "loss": 0.7896,
+ "slid_loss": 0.769,
+ "step": 2475,
+ "time": 13.15
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6283e-04",
+ "loss": 0.7023,
+ "slid_loss": 0.7688,
+ "step": 2476,
+ "time": 13.3
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6280e-04",
+ "loss": 0.7481,
+ "slid_loss": 0.7693,
+ "step": 2477,
+ "time": 13.23
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6277e-04",
+ "loss": 0.7926,
+ "slid_loss": 0.7706,
+ "step": 2478,
+ "time": 13.0
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6274e-04",
+ "loss": 0.6504,
+ "slid_loss": 0.77,
+ "step": 2479,
+ "time": 12.52
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6270e-04",
+ "loss": 0.7893,
+ "slid_loss": 0.7701,
+ "step": 2480,
+ "time": 13.77
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6267e-04",
+ "loss": 0.8186,
+ "slid_loss": 0.771,
+ "step": 2481,
+ "time": 13.03
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": "1.6264e-04",
+ "loss": 0.893,
+ "slid_loss": 0.7719,
+ "step": 2482,
+ "time": 13.02
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6261e-04",
+ "loss": 0.8132,
+ "slid_loss": 0.7716,
+ "step": 2483,
+ "time": 12.39
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6257e-04",
+ "loss": 0.776,
+ "slid_loss": 0.772,
+ "step": 2484,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6254e-04",
+ "loss": 0.802,
+ "slid_loss": 0.7721,
+ "step": 2485,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6251e-04",
+ "loss": 0.7807,
+ "slid_loss": 0.7715,
+ "step": 2486,
+ "time": 12.99
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6248e-04",
+ "loss": 0.7731,
+ "slid_loss": 0.7714,
+ "step": 2487,
+ "time": 11.42
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6244e-04",
+ "loss": 0.846,
+ "slid_loss": 0.773,
+ "step": 2488,
+ "time": 13.81
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6241e-04",
+ "loss": 0.7734,
+ "slid_loss": 0.7734,
+ "step": 2489,
+ "time": 11.75
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6238e-04",
+ "loss": 0.712,
+ "slid_loss": 0.7734,
+ "step": 2490,
+ "time": 13.24
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6235e-04",
+ "loss": 0.8626,
+ "slid_loss": 0.7745,
+ "step": 2491,
+ "time": 12.19
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6231e-04",
+ "loss": 0.7459,
+ "slid_loss": 0.7727,
+ "step": 2492,
+ "time": 12.83
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": "1.6228e-04",
+ "loss": 0.7971,
+ "slid_loss": 0.7726,
+ "step": 2493,
+ "time": 13.2
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6225e-04",
+ "loss": 0.7954,
+ "slid_loss": 0.7728,
+ "step": 2494,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6222e-04",
+ "loss": 0.7523,
+ "slid_loss": 0.7734,
+ "step": 2495,
+ "time": 11.43
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6218e-04",
+ "loss": 0.7914,
+ "slid_loss": 0.7733,
+ "step": 2496,
+ "time": 13.32
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6215e-04",
+ "loss": 0.7158,
+ "slid_loss": 0.7726,
+ "step": 2497,
+ "time": 13.42
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6212e-04",
+ "loss": 0.8404,
+ "slid_loss": 0.7735,
+ "step": 2498,
+ "time": 12.83
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6209e-04",
+ "loss": 0.8212,
+ "slid_loss": 0.7742,
+ "step": 2499,
+ "time": 13.79
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6205e-04",
+ "loss": 0.8013,
+ "slid_loss": 0.7738,
+ "step": 2500,
+ "time": 12.94
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6202e-04",
+ "loss": 0.7077,
+ "slid_loss": 0.7727,
+ "step": 2501,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6199e-04",
+ "loss": 0.7785,
+ "slid_loss": 0.7731,
+ "step": 2502,
+ "time": 11.4
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": "1.6196e-04",
+ "loss": 0.7267,
+ "slid_loss": 0.7734,
+ "step": 2503,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6192e-04",
+ "loss": 0.8219,
+ "slid_loss": 0.7734,
+ "step": 2504,
+ "time": 11.8
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6189e-04",
+ "loss": 0.6668,
+ "slid_loss": 0.7719,
+ "step": 2505,
+ "time": 13.76
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6186e-04",
+ "loss": 0.7414,
+ "slid_loss": 0.7719,
+ "step": 2506,
+ "time": 13.7
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6183e-04",
+ "loss": 0.6569,
+ "slid_loss": 0.7703,
+ "step": 2507,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6179e-04",
+ "loss": 0.7284,
+ "slid_loss": 0.7697,
+ "step": 2508,
+ "time": 13.0
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6176e-04",
+ "loss": 0.7826,
+ "slid_loss": 0.7696,
+ "step": 2509,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6173e-04",
+ "loss": 0.7546,
+ "slid_loss": 0.7694,
+ "step": 2510,
+ "time": 13.78
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6170e-04",
+ "loss": 0.7023,
+ "slid_loss": 0.7684,
+ "step": 2511,
+ "time": 13.73
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6166e-04",
+ "loss": 0.776,
+ "slid_loss": 0.7685,
+ "step": 2512,
+ "time": 13.21
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6163e-04",
+ "loss": 0.6859,
+ "slid_loss": 0.7675,
+ "step": 2513,
+ "time": 13.17
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": "1.6160e-04",
+ "loss": 0.7071,
+ "slid_loss": 0.7669,
+ "step": 2514,
+ "time": 12.72
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6156e-04",
+ "loss": 0.7482,
+ "slid_loss": 0.7675,
+ "step": 2515,
+ "time": 13.44
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6153e-04",
+ "loss": 0.6813,
+ "slid_loss": 0.7666,
+ "step": 2516,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6150e-04",
+ "loss": 0.7776,
+ "slid_loss": 0.7659,
+ "step": 2517,
+ "time": 14.03
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6147e-04",
+ "loss": 0.7137,
+ "slid_loss": 0.7665,
+ "step": 2518,
+ "time": 12.07
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6143e-04",
+ "loss": 0.7073,
+ "slid_loss": 0.7652,
+ "step": 2519,
+ "time": 13.14
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6140e-04",
+ "loss": 0.7791,
+ "slid_loss": 0.7653,
+ "step": 2520,
+ "time": 13.7
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6137e-04",
+ "loss": 0.7411,
+ "slid_loss": 0.7658,
+ "step": 2521,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6134e-04",
+ "loss": 0.793,
+ "slid_loss": 0.7659,
+ "step": 2522,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6130e-04",
+ "loss": 0.805,
+ "slid_loss": 0.7663,
+ "step": 2523,
+ "time": 13.16
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": "1.6127e-04",
+ "loss": 0.726,
+ "slid_loss": 0.7668,
+ "step": 2524,
+ "time": 12.54
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6124e-04",
+ "loss": 0.7142,
+ "slid_loss": 0.7667,
+ "step": 2525,
+ "time": 13.24
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6121e-04",
+ "loss": 0.7898,
+ "slid_loss": 0.7682,
+ "step": 2526,
+ "time": 12.49
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6117e-04",
+ "loss": 0.8513,
+ "slid_loss": 0.7694,
+ "step": 2527,
+ "time": 12.47
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6114e-04",
+ "loss": 0.7983,
+ "slid_loss": 0.7688,
+ "step": 2528,
+ "time": 11.48
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6111e-04",
+ "loss": 0.7522,
+ "slid_loss": 0.7684,
+ "step": 2529,
+ "time": 13.78
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6107e-04",
+ "loss": 0.7608,
+ "slid_loss": 0.7689,
+ "step": 2530,
+ "time": 11.59
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6104e-04",
+ "loss": 0.7395,
+ "slid_loss": 0.7688,
+ "step": 2531,
+ "time": 13.38
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6101e-04",
+ "loss": 0.6857,
+ "slid_loss": 0.7685,
+ "step": 2532,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6098e-04",
+ "loss": 0.7653,
+ "slid_loss": 0.7687,
+ "step": 2533,
+ "time": 13.58
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": "1.6094e-04",
+ "loss": 0.7789,
+ "slid_loss": 0.769,
+ "step": 2534,
+ "time": 11.68
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6091e-04",
+ "loss": 0.7356,
+ "slid_loss": 0.7687,
+ "step": 2535,
+ "time": 12.18
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6088e-04",
+ "loss": 0.8095,
+ "slid_loss": 0.7684,
+ "step": 2536,
+ "time": 13.32
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6085e-04",
+ "loss": 0.7227,
+ "slid_loss": 0.7678,
+ "step": 2537,
+ "time": 13.75
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6081e-04",
+ "loss": 0.788,
+ "slid_loss": 0.7676,
+ "step": 2538,
+ "time": 13.2
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6078e-04",
+ "loss": 0.8287,
+ "slid_loss": 0.7677,
+ "step": 2539,
+ "time": 11.58
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6075e-04",
+ "loss": 0.7958,
+ "slid_loss": 0.7677,
+ "step": 2540,
+ "time": 13.51
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6071e-04",
+ "loss": 0.7521,
+ "slid_loss": 0.7672,
+ "step": 2541,
+ "time": 12.79
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6068e-04",
+ "loss": 0.7179,
+ "slid_loss": 0.7664,
+ "step": 2542,
+ "time": 13.41
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6065e-04",
+ "loss": 0.7349,
+ "slid_loss": 0.766,
+ "step": 2543,
+ "time": 13.47
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6062e-04",
+ "loss": 0.7754,
+ "slid_loss": 0.7663,
+ "step": 2544,
+ "time": 13.83
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": "1.6058e-04",
+ "loss": 0.8546,
+ "slid_loss": 0.7676,
+ "step": 2545,
+ "time": 11.85
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6055e-04",
+ "loss": 0.802,
+ "slid_loss": 0.7679,
+ "step": 2546,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6052e-04",
+ "loss": 0.8564,
+ "slid_loss": 0.7684,
+ "step": 2547,
+ "time": 13.3
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6049e-04",
+ "loss": 0.8153,
+ "slid_loss": 0.7682,
+ "step": 2548,
+ "time": 12.88
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6045e-04",
+ "loss": 0.7352,
+ "slid_loss": 0.7676,
+ "step": 2549,
+ "time": 12.95
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6042e-04",
+ "loss": 0.6608,
+ "slid_loss": 0.7663,
+ "step": 2550,
+ "time": 13.3
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6039e-04",
+ "loss": 0.7716,
+ "slid_loss": 0.7655,
+ "step": 2551,
+ "time": 12.16
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6035e-04",
+ "loss": 0.7292,
+ "slid_loss": 0.7652,
+ "step": 2552,
+ "time": 12.29
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6032e-04",
+ "loss": 0.7519,
+ "slid_loss": 0.7648,
+ "step": 2553,
+ "time": 11.67
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6029e-04",
+ "loss": 0.7953,
+ "slid_loss": 0.7653,
+ "step": 2554,
+ "time": 14.36
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": "1.6026e-04",
+ "loss": 0.7437,
+ "slid_loss": 0.765,
+ "step": 2555,
+ "time": 13.53
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.6022e-04",
+ "loss": 0.6395,
+ "slid_loss": 0.7636,
+ "step": 2556,
+ "time": 12.01
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.6019e-04",
+ "loss": 0.744,
+ "slid_loss": 0.7639,
+ "step": 2557,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.6016e-04",
+ "loss": 0.7535,
+ "slid_loss": 0.7638,
+ "step": 2558,
+ "time": 11.39
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.6012e-04",
+ "loss": 0.6509,
+ "slid_loss": 0.7624,
+ "step": 2559,
+ "time": 12.11
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.6009e-04",
+ "loss": 0.8245,
+ "slid_loss": 0.7626,
+ "step": 2560,
+ "time": 14.74
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.6006e-04",
+ "loss": 0.7419,
+ "slid_loss": 0.7608,
+ "step": 2561,
+ "time": 13.9
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.6003e-04",
+ "loss": 0.7167,
+ "slid_loss": 0.7602,
+ "step": 2562,
+ "time": 11.71
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.5999e-04",
+ "loss": 0.7835,
+ "slid_loss": 0.7616,
+ "step": 2563,
+ "time": 13.64
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.5996e-04",
+ "loss": 0.8066,
+ "slid_loss": 0.7617,
+ "step": 2564,
+ "time": 13.08
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.5993e-04",
+ "loss": 0.6595,
+ "slid_loss": 0.7604,
+ "step": 2565,
+ "time": 13.75
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": "1.5989e-04",
+ "loss": 0.7741,
+ "slid_loss": 0.7603,
+ "step": 2566,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5986e-04",
+ "loss": 0.7507,
+ "slid_loss": 0.7598,
+ "step": 2567,
+ "time": 13.95
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5983e-04",
+ "loss": 0.7065,
+ "slid_loss": 0.7603,
+ "step": 2568,
+ "time": 12.95
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5980e-04",
+ "loss": 0.8069,
+ "slid_loss": 0.7606,
+ "step": 2569,
+ "time": 13.77
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5976e-04",
+ "loss": 0.717,
+ "slid_loss": 0.7597,
+ "step": 2570,
+ "time": 14.05
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5973e-04",
+ "loss": 0.7404,
+ "slid_loss": 0.759,
+ "step": 2571,
+ "time": 14.09
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5970e-04",
+ "loss": 0.81,
+ "slid_loss": 0.7597,
+ "step": 2572,
+ "time": 14.25
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5966e-04",
+ "loss": 0.816,
+ "slid_loss": 0.7617,
+ "step": 2573,
+ "time": 14.41
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5963e-04",
+ "loss": 0.7489,
+ "slid_loss": 0.761,
+ "step": 2574,
+ "time": 13.61
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5960e-04",
+ "loss": 0.7789,
+ "slid_loss": 0.7609,
+ "step": 2575,
+ "time": 13.1
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": "1.5957e-04",
+ "loss": 0.7427,
+ "slid_loss": 0.7613,
+ "step": 2576,
+ "time": 14.12
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5953e-04",
+ "loss": 0.7926,
+ "slid_loss": 0.7617,
+ "step": 2577,
+ "time": 13.63
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5950e-04",
+ "loss": 0.7035,
+ "slid_loss": 0.7608,
+ "step": 2578,
+ "time": 11.24
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5947e-04",
+ "loss": 0.8069,
+ "slid_loss": 0.7624,
+ "step": 2579,
+ "time": 12.09
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5943e-04",
+ "loss": 0.6796,
+ "slid_loss": 0.7613,
+ "step": 2580,
+ "time": 13.26
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5940e-04",
+ "loss": 0.7261,
+ "slid_loss": 0.7604,
+ "step": 2581,
+ "time": 14.15
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5937e-04",
+ "loss": 0.8672,
+ "slid_loss": 0.7601,
+ "step": 2582,
+ "time": 13.69
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5933e-04",
+ "loss": 0.7342,
+ "slid_loss": 0.7593,
+ "step": 2583,
+ "time": 11.96
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5930e-04",
+ "loss": 0.7754,
+ "slid_loss": 0.7593,
+ "step": 2584,
+ "time": 13.56
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5927e-04",
+ "loss": 0.7658,
+ "slid_loss": 0.7589,
+ "step": 2585,
+ "time": 13.11
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": "1.5924e-04",
+ "loss": 0.7196,
+ "slid_loss": 0.7583,
+ "step": 2586,
+ "time": 12.28
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5920e-04",
+ "loss": 0.7761,
+ "slid_loss": 0.7584,
+ "step": 2587,
+ "time": 11.17
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5917e-04",
+ "loss": 0.7566,
+ "slid_loss": 0.7575,
+ "step": 2588,
+ "time": 14.45
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5914e-04",
+ "loss": 0.6523,
+ "slid_loss": 0.7563,
+ "step": 2589,
+ "time": 12.95
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5910e-04",
+ "loss": 0.7621,
+ "slid_loss": 0.7568,
+ "step": 2590,
+ "time": 12.92
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5907e-04",
+ "loss": 0.8299,
+ "slid_loss": 0.7564,
+ "step": 2591,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5904e-04",
+ "loss": 0.7154,
+ "slid_loss": 0.7561,
+ "step": 2592,
+ "time": 14.56
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5901e-04",
+ "loss": 0.7038,
+ "slid_loss": 0.7552,
+ "step": 2593,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5897e-04",
+ "loss": 0.752,
+ "slid_loss": 0.7548,
+ "step": 2594,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5894e-04",
+ "loss": 0.7786,
+ "slid_loss": 0.755,
+ "step": 2595,
+ "time": 13.68
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5891e-04",
+ "loss": 0.7727,
+ "slid_loss": 0.7548,
+ "step": 2596,
+ "time": 11.73
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": "1.5887e-04",
+ "loss": 0.7248,
+ "slid_loss": 0.7549,
+ "step": 2597,
+ "time": 13.75
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5884e-04",
+ "loss": 0.7293,
+ "slid_loss": 0.7538,
+ "step": 2598,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5881e-04",
+ "loss": 0.7143,
+ "slid_loss": 0.7527,
+ "step": 2599,
+ "time": 13.44
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5877e-04",
+ "loss": 0.8009,
+ "slid_loss": 0.7527,
+ "step": 2600,
+ "time": 12.79
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5874e-04",
+ "loss": 0.7705,
+ "slid_loss": 0.7534,
+ "step": 2601,
+ "time": 12.02
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5871e-04",
+ "loss": 0.7312,
+ "slid_loss": 0.7529,
+ "step": 2602,
+ "time": 12.34
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5868e-04",
+ "loss": 0.7906,
+ "slid_loss": 0.7535,
+ "step": 2603,
+ "time": 13.32
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5864e-04",
+ "loss": 0.7094,
+ "slid_loss": 0.7524,
+ "step": 2604,
+ "time": 12.95
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5861e-04",
+ "loss": 0.8538,
+ "slid_loss": 0.7543,
+ "step": 2605,
+ "time": 12.99
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5858e-04",
+ "loss": 0.7116,
+ "slid_loss": 0.754,
+ "step": 2606,
+ "time": 11.37
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": "1.5854e-04",
+ "loss": 0.7694,
+ "slid_loss": 0.7551,
+ "step": 2607,
+ "time": 12.83
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5851e-04",
+ "loss": 0.765,
+ "slid_loss": 0.7555,
+ "step": 2608,
+ "time": 12.34
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5848e-04",
+ "loss": 0.7744,
+ "slid_loss": 0.7554,
+ "step": 2609,
+ "time": 11.05
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5844e-04",
+ "loss": 0.7855,
+ "slid_loss": 0.7557,
+ "step": 2610,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5841e-04",
+ "loss": 0.6962,
+ "slid_loss": 0.7556,
+ "step": 2611,
+ "time": 12.62
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5838e-04",
+ "loss": 0.7646,
+ "slid_loss": 0.7555,
+ "step": 2612,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5834e-04",
+ "loss": 0.8084,
+ "slid_loss": 0.7568,
+ "step": 2613,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5831e-04",
+ "loss": 0.7533,
+ "slid_loss": 0.7572,
+ "step": 2614,
+ "time": 13.19
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5828e-04",
+ "loss": 0.7667,
+ "slid_loss": 0.7574,
+ "step": 2615,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5825e-04",
+ "loss": 0.6648,
+ "slid_loss": 0.7572,
+ "step": 2616,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5821e-04",
+ "loss": 0.6563,
+ "slid_loss": 0.756,
+ "step": 2617,
+ "time": 12.97
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": "1.5818e-04",
+ "loss": 0.6764,
+ "slid_loss": 0.7557,
+ "step": 2618,
+ "time": 11.88
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5815e-04",
+ "loss": 0.798,
+ "slid_loss": 0.7566,
+ "step": 2619,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5811e-04",
+ "loss": 0.8125,
+ "slid_loss": 0.7569,
+ "step": 2620,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5808e-04",
+ "loss": 0.8022,
+ "slid_loss": 0.7575,
+ "step": 2621,
+ "time": 13.69
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5805e-04",
+ "loss": 0.7877,
+ "slid_loss": 0.7574,
+ "step": 2622,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5801e-04",
+ "loss": 0.843,
+ "slid_loss": 0.7578,
+ "step": 2623,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5798e-04",
+ "loss": 0.6315,
+ "slid_loss": 0.7569,
+ "step": 2624,
+ "time": 14.01
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5795e-04",
+ "loss": 0.734,
+ "slid_loss": 0.7571,
+ "step": 2625,
+ "time": 13.53
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5791e-04",
+ "loss": 0.7069,
+ "slid_loss": 0.7563,
+ "step": 2626,
+ "time": 12.75
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5788e-04",
+ "loss": 0.7965,
+ "slid_loss": 0.7557,
+ "step": 2627,
+ "time": 12.39
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": "1.5785e-04",
+ "loss": 0.6697,
+ "slid_loss": 0.7544,
+ "step": 2628,
+ "time": 13.87
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5782e-04",
+ "loss": 0.6838,
+ "slid_loss": 0.7537,
+ "step": 2629,
+ "time": 13.07
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5778e-04",
+ "loss": 0.8238,
+ "slid_loss": 0.7544,
+ "step": 2630,
+ "time": 13.42
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5775e-04",
+ "loss": 0.7702,
+ "slid_loss": 0.7547,
+ "step": 2631,
+ "time": 13.29
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5772e-04",
+ "loss": 0.8191,
+ "slid_loss": 0.756,
+ "step": 2632,
+ "time": 13.01
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5768e-04",
+ "loss": 0.7865,
+ "slid_loss": 0.7562,
+ "step": 2633,
+ "time": 13.83
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5765e-04",
+ "loss": 0.7266,
+ "slid_loss": 0.7557,
+ "step": 2634,
+ "time": 11.02
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5762e-04",
+ "loss": 0.7764,
+ "slid_loss": 0.7561,
+ "step": 2635,
+ "time": 13.73
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5758e-04",
+ "loss": 0.7986,
+ "slid_loss": 0.756,
+ "step": 2636,
+ "time": 13.77
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5755e-04",
+ "loss": 0.6551,
+ "slid_loss": 0.7553,
+ "step": 2637,
+ "time": 13.48
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": "1.5752e-04",
+ "loss": 0.7846,
+ "slid_loss": 0.7553,
+ "step": 2638,
+ "time": 14.18
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5748e-04",
+ "loss": 0.6623,
+ "slid_loss": 0.7536,
+ "step": 2639,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5745e-04",
+ "loss": 0.8719,
+ "slid_loss": 0.7544,
+ "step": 2640,
+ "time": 13.44
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5742e-04",
+ "loss": 0.7279,
+ "slid_loss": 0.7541,
+ "step": 2641,
+ "time": 13.36
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5738e-04",
+ "loss": 0.719,
+ "slid_loss": 0.7541,
+ "step": 2642,
+ "time": 13.82
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5735e-04",
+ "loss": 0.7999,
+ "slid_loss": 0.7548,
+ "step": 2643,
+ "time": 12.75
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5732e-04",
+ "loss": 0.7951,
+ "slid_loss": 0.755,
+ "step": 2644,
+ "time": 14.26
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5728e-04",
+ "loss": 0.737,
+ "slid_loss": 0.7538,
+ "step": 2645,
+ "time": 13.46
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5725e-04",
+ "loss": 0.7354,
+ "slid_loss": 0.7531,
+ "step": 2646,
+ "time": 12.83
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5722e-04",
+ "loss": 0.6958,
+ "slid_loss": 0.7515,
+ "step": 2647,
+ "time": 14.17
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5719e-04",
+ "loss": 0.8315,
+ "slid_loss": 0.7517,
+ "step": 2648,
+ "time": 11.62
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": "1.5715e-04",
+ "loss": 0.7353,
+ "slid_loss": 0.7517,
+ "step": 2649,
+ "time": 13.43
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5712e-04",
+ "loss": 0.7678,
+ "slid_loss": 0.7528,
+ "step": 2650,
+ "time": 13.96
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5709e-04",
+ "loss": 0.7072,
+ "slid_loss": 0.7521,
+ "step": 2651,
+ "time": 14.27
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5705e-04",
+ "loss": 0.7597,
+ "slid_loss": 0.7524,
+ "step": 2652,
+ "time": 12.73
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5702e-04",
+ "loss": 0.6534,
+ "slid_loss": 0.7515,
+ "step": 2653,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5699e-04",
+ "loss": 0.7455,
+ "slid_loss": 0.751,
+ "step": 2654,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5695e-04",
+ "loss": 0.7421,
+ "slid_loss": 0.7509,
+ "step": 2655,
+ "time": 12.66
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5692e-04",
+ "loss": 0.7313,
+ "slid_loss": 0.7519,
+ "step": 2656,
+ "time": 13.32
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5689e-04",
+ "loss": 0.6588,
+ "slid_loss": 0.751,
+ "step": 2657,
+ "time": 11.37
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5685e-04",
+ "loss": 0.7248,
+ "slid_loss": 0.7507,
+ "step": 2658,
+ "time": 12.93
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": "1.5682e-04",
+ "loss": 0.8126,
+ "slid_loss": 0.7523,
+ "step": 2659,
+ "time": 13.05
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5679e-04",
+ "loss": 0.5946,
+ "slid_loss": 0.75,
+ "step": 2660,
+ "time": 14.45
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5675e-04",
+ "loss": 0.7433,
+ "slid_loss": 0.75,
+ "step": 2661,
+ "time": 13.63
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5672e-04",
+ "loss": 0.7957,
+ "slid_loss": 0.7508,
+ "step": 2662,
+ "time": 13.54
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5669e-04",
+ "loss": 0.7429,
+ "slid_loss": 0.7504,
+ "step": 2663,
+ "time": 11.41
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5665e-04",
+ "loss": 0.7322,
+ "slid_loss": 0.7497,
+ "step": 2664,
+ "time": 13.21
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5662e-04",
+ "loss": 0.7457,
+ "slid_loss": 0.7505,
+ "step": 2665,
+ "time": 13.63
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5659e-04",
+ "loss": 0.74,
+ "slid_loss": 0.7502,
+ "step": 2666,
+ "time": 12.84
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5655e-04",
+ "loss": 0.8403,
+ "slid_loss": 0.7511,
+ "step": 2667,
+ "time": 13.0
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5652e-04",
+ "loss": 0.6667,
+ "slid_loss": 0.7507,
+ "step": 2668,
+ "time": 11.65
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5649e-04",
+ "loss": 0.7343,
+ "slid_loss": 0.75,
+ "step": 2669,
+ "time": 11.75
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": "1.5645e-04",
+ "loss": 0.7765,
+ "slid_loss": 0.7506,
+ "step": 2670,
+ "time": 12.88
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5642e-04",
+ "loss": 0.6627,
+ "slid_loss": 0.7498,
+ "step": 2671,
+ "time": 13.38
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5639e-04",
+ "loss": 0.7361,
+ "slid_loss": 0.7491,
+ "step": 2672,
+ "time": 12.75
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5635e-04",
+ "loss": 0.7962,
+ "slid_loss": 0.7489,
+ "step": 2673,
+ "time": 13.2
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5632e-04",
+ "loss": 0.7437,
+ "slid_loss": 0.7488,
+ "step": 2674,
+ "time": 12.32
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5629e-04",
+ "loss": 0.7281,
+ "slid_loss": 0.7483,
+ "step": 2675,
+ "time": 11.29
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5625e-04",
+ "loss": 0.8678,
+ "slid_loss": 0.7496,
+ "step": 2676,
+ "time": 13.77
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5622e-04",
+ "loss": 0.7707,
+ "slid_loss": 0.7493,
+ "step": 2677,
+ "time": 13.75
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5619e-04",
+ "loss": 0.904,
+ "slid_loss": 0.7513,
+ "step": 2678,
+ "time": 13.39
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5615e-04",
+ "loss": 0.7357,
+ "slid_loss": 0.7506,
+ "step": 2679,
+ "time": 13.5
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": "1.5612e-04",
+ "loss": 0.686,
+ "slid_loss": 0.7507,
+ "step": 2680,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5609e-04",
+ "loss": 0.7255,
+ "slid_loss": 0.7507,
+ "step": 2681,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5606e-04",
+ "loss": 0.7308,
+ "slid_loss": 0.7493,
+ "step": 2682,
+ "time": 12.14
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5602e-04",
+ "loss": 0.7224,
+ "slid_loss": 0.7492,
+ "step": 2683,
+ "time": 13.42
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5599e-04",
+ "loss": 0.6316,
+ "slid_loss": 0.7478,
+ "step": 2684,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5596e-04",
+ "loss": 0.677,
+ "slid_loss": 0.7469,
+ "step": 2685,
+ "time": 13.12
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5592e-04",
+ "loss": 0.8373,
+ "slid_loss": 0.7481,
+ "step": 2686,
+ "time": 12.17
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5589e-04",
+ "loss": 0.6994,
+ "slid_loss": 0.7473,
+ "step": 2687,
+ "time": 13.72
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5586e-04",
+ "loss": 0.6977,
+ "slid_loss": 0.7467,
+ "step": 2688,
+ "time": 12.97
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5582e-04",
+ "loss": 0.7113,
+ "slid_loss": 0.7473,
+ "step": 2689,
+ "time": 11.48
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": "1.5579e-04",
+ "loss": 0.677,
+ "slid_loss": 0.7464,
+ "step": 2690,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5576e-04",
+ "loss": 0.7765,
+ "slid_loss": 0.7459,
+ "step": 2691,
+ "time": 13.79
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5572e-04",
+ "loss": 0.7437,
+ "slid_loss": 0.7462,
+ "step": 2692,
+ "time": 13.98
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5569e-04",
+ "loss": 0.7709,
+ "slid_loss": 0.7469,
+ "step": 2693,
+ "time": 13.85
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5566e-04",
+ "loss": 0.6544,
+ "slid_loss": 0.7459,
+ "step": 2694,
+ "time": 11.57
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5562e-04",
+ "loss": 0.6891,
+ "slid_loss": 0.745,
+ "step": 2695,
+ "time": 12.94
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5559e-04",
+ "loss": 0.7832,
+ "slid_loss": 0.7451,
+ "step": 2696,
+ "time": 13.12
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5556e-04",
+ "loss": 0.6314,
+ "slid_loss": 0.7442,
+ "step": 2697,
+ "time": 11.83
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5552e-04",
+ "loss": 0.6901,
+ "slid_loss": 0.7438,
+ "step": 2698,
+ "time": 12.82
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5549e-04",
+ "loss": 0.7755,
+ "slid_loss": 0.7444,
+ "step": 2699,
+ "time": 13.69
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5546e-04",
+ "loss": 0.6637,
+ "slid_loss": 0.743,
+ "step": 2700,
+ "time": 13.17
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": "1.5542e-04",
+ "loss": 0.8759,
+ "slid_loss": 0.7441,
+ "step": 2701,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5539e-04",
+ "loss": 0.6961,
+ "slid_loss": 0.7437,
+ "step": 2702,
+ "time": 12.88
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5536e-04",
+ "loss": 0.8323,
+ "slid_loss": 0.7441,
+ "step": 2703,
+ "time": 12.97
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5532e-04",
+ "loss": 0.7245,
+ "slid_loss": 0.7443,
+ "step": 2704,
+ "time": 12.83
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5529e-04",
+ "loss": 0.7406,
+ "slid_loss": 0.7431,
+ "step": 2705,
+ "time": 11.41
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5526e-04",
+ "loss": 0.6497,
+ "slid_loss": 0.7425,
+ "step": 2706,
+ "time": 12.99
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5522e-04",
+ "loss": 0.7763,
+ "slid_loss": 0.7426,
+ "step": 2707,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5519e-04",
+ "loss": 0.8114,
+ "slid_loss": 0.7431,
+ "step": 2708,
+ "time": 13.42
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5516e-04",
+ "loss": 0.7379,
+ "slid_loss": 0.7427,
+ "step": 2709,
+ "time": 13.53
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5512e-04",
+ "loss": 0.8181,
+ "slid_loss": 0.743,
+ "step": 2710,
+ "time": 10.93
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": "1.5509e-04",
+ "loss": 0.6495,
+ "slid_loss": 0.7426,
+ "step": 2711,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5506e-04",
+ "loss": 0.8279,
+ "slid_loss": 0.7432,
+ "step": 2712,
+ "time": 13.43
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5502e-04",
+ "loss": 0.7724,
+ "slid_loss": 0.7428,
+ "step": 2713,
+ "time": 13.7
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5499e-04",
+ "loss": 0.6782,
+ "slid_loss": 0.7421,
+ "step": 2714,
+ "time": 11.71
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5496e-04",
+ "loss": 0.7126,
+ "slid_loss": 0.7415,
+ "step": 2715,
+ "time": 11.73
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5492e-04",
+ "loss": 0.6476,
+ "slid_loss": 0.7414,
+ "step": 2716,
+ "time": 13.64
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5489e-04",
+ "loss": 0.6864,
+ "slid_loss": 0.7417,
+ "step": 2717,
+ "time": 13.59
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5485e-04",
+ "loss": 0.7045,
+ "slid_loss": 0.7419,
+ "step": 2718,
+ "time": 13.48
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5482e-04",
+ "loss": 0.6649,
+ "slid_loss": 0.7406,
+ "step": 2719,
+ "time": 14.0
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5479e-04",
+ "loss": 0.6917,
+ "slid_loss": 0.7394,
+ "step": 2720,
+ "time": 12.27
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5475e-04",
+ "loss": 0.7116,
+ "slid_loss": 0.7385,
+ "step": 2721,
+ "time": 14.17
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": "1.5472e-04",
+ "loss": 0.7193,
+ "slid_loss": 0.7378,
+ "step": 2722,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5469e-04",
+ "loss": 0.7463,
+ "slid_loss": 0.7369,
+ "step": 2723,
+ "time": 13.46
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5465e-04",
+ "loss": 0.7429,
+ "slid_loss": 0.738,
+ "step": 2724,
+ "time": 13.53
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5462e-04",
+ "loss": 0.6853,
+ "slid_loss": 0.7375,
+ "step": 2725,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5459e-04",
+ "loss": 0.8209,
+ "slid_loss": 0.7386,
+ "step": 2726,
+ "time": 13.9
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5455e-04",
+ "loss": 0.7261,
+ "slid_loss": 0.7379,
+ "step": 2727,
+ "time": 13.43
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5452e-04",
+ "loss": 0.6481,
+ "slid_loss": 0.7377,
+ "step": 2728,
+ "time": 13.54
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5449e-04",
+ "loss": 0.8078,
+ "slid_loss": 0.7389,
+ "step": 2729,
+ "time": 14.13
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5445e-04",
+ "loss": 0.6865,
+ "slid_loss": 0.7376,
+ "step": 2730,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5442e-04",
+ "loss": 0.6555,
+ "slid_loss": 0.7364,
+ "step": 2731,
+ "time": 13.28
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": "1.5439e-04",
+ "loss": 0.6904,
+ "slid_loss": 0.7351,
+ "step": 2732,
+ "time": 13.79
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5435e-04",
+ "loss": 0.7761,
+ "slid_loss": 0.735,
+ "step": 2733,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5432e-04",
+ "loss": 0.7068,
+ "slid_loss": 0.7348,
+ "step": 2734,
+ "time": 12.99
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5429e-04",
+ "loss": 0.6965,
+ "slid_loss": 0.734,
+ "step": 2735,
+ "time": 11.67
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5425e-04",
+ "loss": 0.8568,
+ "slid_loss": 0.7346,
+ "step": 2736,
+ "time": 11.45
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5422e-04",
+ "loss": 0.8005,
+ "slid_loss": 0.7361,
+ "step": 2737,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5419e-04",
+ "loss": 0.7905,
+ "slid_loss": 0.7361,
+ "step": 2738,
+ "time": 14.06
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5415e-04",
+ "loss": 0.6963,
+ "slid_loss": 0.7365,
+ "step": 2739,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5412e-04",
+ "loss": 0.6523,
+ "slid_loss": 0.7343,
+ "step": 2740,
+ "time": 13.29
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5409e-04",
+ "loss": 0.8105,
+ "slid_loss": 0.7351,
+ "step": 2741,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5405e-04",
+ "loss": 0.7098,
+ "slid_loss": 0.735,
+ "step": 2742,
+ "time": 13.77
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": "1.5402e-04",
+ "loss": 0.7424,
+ "slid_loss": 0.7344,
+ "step": 2743,
+ "time": 11.6
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5399e-04",
+ "loss": 0.8228,
+ "slid_loss": 0.7347,
+ "step": 2744,
+ "time": 12.91
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5395e-04",
+ "loss": 0.7302,
+ "slid_loss": 0.7346,
+ "step": 2745,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5392e-04",
+ "loss": 0.7406,
+ "slid_loss": 0.7347,
+ "step": 2746,
+ "time": 13.81
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5389e-04",
+ "loss": 0.7007,
+ "slid_loss": 0.7347,
+ "step": 2747,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5385e-04",
+ "loss": 0.6864,
+ "slid_loss": 0.7333,
+ "step": 2748,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5382e-04",
+ "loss": 0.8617,
+ "slid_loss": 0.7346,
+ "step": 2749,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5379e-04",
+ "loss": 0.7864,
+ "slid_loss": 0.7347,
+ "step": 2750,
+ "time": 13.68
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5375e-04",
+ "loss": 0.7584,
+ "slid_loss": 0.7353,
+ "step": 2751,
+ "time": 11.16
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5372e-04",
+ "loss": 0.7596,
+ "slid_loss": 0.7353,
+ "step": 2752,
+ "time": 14.07
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": "1.5369e-04",
+ "loss": 0.7005,
+ "slid_loss": 0.7357,
+ "step": 2753,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5365e-04",
+ "loss": 0.8186,
+ "slid_loss": 0.7365,
+ "step": 2754,
+ "time": 13.67
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5362e-04",
+ "loss": 0.6628,
+ "slid_loss": 0.7357,
+ "step": 2755,
+ "time": 13.37
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5359e-04",
+ "loss": 0.7217,
+ "slid_loss": 0.7356,
+ "step": 2756,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5355e-04",
+ "loss": 0.7855,
+ "slid_loss": 0.7368,
+ "step": 2757,
+ "time": 11.57
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5352e-04",
+ "loss": 0.7084,
+ "slid_loss": 0.7367,
+ "step": 2758,
+ "time": 13.37
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5348e-04",
+ "loss": 0.709,
+ "slid_loss": 0.7356,
+ "step": 2759,
+ "time": 13.83
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5345e-04",
+ "loss": 0.6412,
+ "slid_loss": 0.7361,
+ "step": 2760,
+ "time": 13.37
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5342e-04",
+ "loss": 0.7029,
+ "slid_loss": 0.7357,
+ "step": 2761,
+ "time": 11.96
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5338e-04",
+ "loss": 0.7364,
+ "slid_loss": 0.7351,
+ "step": 2762,
+ "time": 13.85
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": "1.5335e-04",
+ "loss": 0.7596,
+ "slid_loss": 0.7353,
+ "step": 2763,
+ "time": 12.19
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5332e-04",
+ "loss": 0.8489,
+ "slid_loss": 0.7364,
+ "step": 2764,
+ "time": 13.46
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5328e-04",
+ "loss": 0.7683,
+ "slid_loss": 0.7367,
+ "step": 2765,
+ "time": 11.91
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5325e-04",
+ "loss": 0.742,
+ "slid_loss": 0.7367,
+ "step": 2766,
+ "time": 12.6
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5322e-04",
+ "loss": 0.7009,
+ "slid_loss": 0.7353,
+ "step": 2767,
+ "time": 13.71
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5318e-04",
+ "loss": 0.6136,
+ "slid_loss": 0.7348,
+ "step": 2768,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5315e-04",
+ "loss": 0.7672,
+ "slid_loss": 0.7351,
+ "step": 2769,
+ "time": 13.58
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5312e-04",
+ "loss": 0.7279,
+ "slid_loss": 0.7346,
+ "step": 2770,
+ "time": 14.4
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5308e-04",
+ "loss": 0.7057,
+ "slid_loss": 0.735,
+ "step": 2771,
+ "time": 13.97
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5305e-04",
+ "loss": 0.7315,
+ "slid_loss": 0.735,
+ "step": 2772,
+ "time": 12.06
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5302e-04",
+ "loss": 0.7345,
+ "slid_loss": 0.7344,
+ "step": 2773,
+ "time": 11.48
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": "1.5298e-04",
+ "loss": 0.6731,
+ "slid_loss": 0.7337,
+ "step": 2774,
+ "time": 13.87
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5295e-04",
+ "loss": 0.6577,
+ "slid_loss": 0.733,
+ "step": 2775,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5292e-04",
+ "loss": 0.7373,
+ "slid_loss": 0.7316,
+ "step": 2776,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5288e-04",
+ "loss": 0.7546,
+ "slid_loss": 0.7315,
+ "step": 2777,
+ "time": 13.41
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5285e-04",
+ "loss": 0.632,
+ "slid_loss": 0.7288,
+ "step": 2778,
+ "time": 12.21
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5282e-04",
+ "loss": 0.7545,
+ "slid_loss": 0.729,
+ "step": 2779,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5278e-04",
+ "loss": 0.7351,
+ "slid_loss": 0.7294,
+ "step": 2780,
+ "time": 14.14
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5275e-04",
+ "loss": 0.7375,
+ "slid_loss": 0.7296,
+ "step": 2781,
+ "time": 13.26
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5272e-04",
+ "loss": 0.7292,
+ "slid_loss": 0.7296,
+ "step": 2782,
+ "time": 12.92
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5268e-04",
+ "loss": 0.721,
+ "slid_loss": 0.7295,
+ "step": 2783,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": "1.5265e-04",
+ "loss": 0.727,
+ "slid_loss": 0.7305,
+ "step": 2784,
+ "time": 11.08
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5261e-04",
+ "loss": 0.6365,
+ "slid_loss": 0.7301,
+ "step": 2785,
+ "time": 13.73
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5258e-04",
+ "loss": 0.7794,
+ "slid_loss": 0.7295,
+ "step": 2786,
+ "time": 12.68
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5255e-04",
+ "loss": 0.7507,
+ "slid_loss": 0.73,
+ "step": 2787,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5251e-04",
+ "loss": 0.716,
+ "slid_loss": 0.7302,
+ "step": 2788,
+ "time": 13.66
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5248e-04",
+ "loss": 0.7236,
+ "slid_loss": 0.7303,
+ "step": 2789,
+ "time": 12.43
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5245e-04",
+ "loss": 0.755,
+ "slid_loss": 0.7311,
+ "step": 2790,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5241e-04",
+ "loss": 0.7382,
+ "slid_loss": 0.7307,
+ "step": 2791,
+ "time": 13.39
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5238e-04",
+ "loss": 0.6568,
+ "slid_loss": 0.7299,
+ "step": 2792,
+ "time": 13.3
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5235e-04",
+ "loss": 0.7227,
+ "slid_loss": 0.7294,
+ "step": 2793,
+ "time": 12.2
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5231e-04",
+ "loss": 0.7255,
+ "slid_loss": 0.7301,
+ "step": 2794,
+ "time": 11.74
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": "1.5228e-04",
+ "loss": 0.709,
+ "slid_loss": 0.7303,
+ "step": 2795,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5225e-04",
+ "loss": 0.8038,
+ "slid_loss": 0.7305,
+ "step": 2796,
+ "time": 11.36
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5221e-04",
+ "loss": 0.8184,
+ "slid_loss": 0.7324,
+ "step": 2797,
+ "time": 14.33
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5218e-04",
+ "loss": 0.6162,
+ "slid_loss": 0.7316,
+ "step": 2798,
+ "time": 13.86
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5215e-04",
+ "loss": 0.799,
+ "slid_loss": 0.7319,
+ "step": 2799,
+ "time": 11.63
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5211e-04",
+ "loss": 0.6719,
+ "slid_loss": 0.7319,
+ "step": 2800,
+ "time": 13.03
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5208e-04",
+ "loss": 0.702,
+ "slid_loss": 0.7302,
+ "step": 2801,
+ "time": 13.73
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5205e-04",
+ "loss": 0.6694,
+ "slid_loss": 0.7299,
+ "step": 2802,
+ "time": 13.86
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5201e-04",
+ "loss": 0.6669,
+ "slid_loss": 0.7283,
+ "step": 2803,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5198e-04",
+ "loss": 0.7653,
+ "slid_loss": 0.7287,
+ "step": 2804,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": "1.5194e-04",
+ "loss": 0.728,
+ "slid_loss": 0.7286,
+ "step": 2805,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5191e-04",
+ "loss": 0.7077,
+ "slid_loss": 0.7291,
+ "step": 2806,
+ "time": 13.18
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5188e-04",
+ "loss": 0.7786,
+ "slid_loss": 0.7292,
+ "step": 2807,
+ "time": 11.83
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5184e-04",
+ "loss": 0.6313,
+ "slid_loss": 0.7274,
+ "step": 2808,
+ "time": 12.81
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5181e-04",
+ "loss": 0.7382,
+ "slid_loss": 0.7274,
+ "step": 2809,
+ "time": 14.42
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5178e-04",
+ "loss": 0.7741,
+ "slid_loss": 0.7269,
+ "step": 2810,
+ "time": 13.27
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5174e-04",
+ "loss": 0.7427,
+ "slid_loss": 0.7279,
+ "step": 2811,
+ "time": 12.01
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5171e-04",
+ "loss": 0.6997,
+ "slid_loss": 0.7266,
+ "step": 2812,
+ "time": 11.5
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5168e-04",
+ "loss": 0.7278,
+ "slid_loss": 0.7261,
+ "step": 2813,
+ "time": 13.17
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5164e-04",
+ "loss": 0.7322,
+ "slid_loss": 0.7267,
+ "step": 2814,
+ "time": 13.01
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": "1.5161e-04",
+ "loss": 0.7417,
+ "slid_loss": 0.727,
+ "step": 2815,
+ "time": 13.39
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5158e-04",
+ "loss": 0.6702,
+ "slid_loss": 0.7272,
+ "step": 2816,
+ "time": 12.71
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5154e-04",
+ "loss": 0.7166,
+ "slid_loss": 0.7275,
+ "step": 2817,
+ "time": 14.09
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5151e-04",
+ "loss": 0.7633,
+ "slid_loss": 0.7281,
+ "step": 2818,
+ "time": 11.45
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5148e-04",
+ "loss": 0.8231,
+ "slid_loss": 0.7297,
+ "step": 2819,
+ "time": 12.11
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5144e-04",
+ "loss": 0.7897,
+ "slid_loss": 0.7306,
+ "step": 2820,
+ "time": 13.19
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5141e-04",
+ "loss": 0.6531,
+ "slid_loss": 0.7301,
+ "step": 2821,
+ "time": 13.76
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5137e-04",
+ "loss": 0.7633,
+ "slid_loss": 0.7305,
+ "step": 2822,
+ "time": 13.58
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5134e-04",
+ "loss": 0.7941,
+ "slid_loss": 0.731,
+ "step": 2823,
+ "time": 12.29
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5131e-04",
+ "loss": 0.7046,
+ "slid_loss": 0.7306,
+ "step": 2824,
+ "time": 14.25
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5127e-04",
+ "loss": 0.6832,
+ "slid_loss": 0.7306,
+ "step": 2825,
+ "time": 12.21
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": "1.5124e-04",
+ "loss": 0.7824,
+ "slid_loss": 0.7302,
+ "step": 2826,
+ "time": 12.01
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5121e-04",
+ "loss": 0.6516,
+ "slid_loss": 0.7294,
+ "step": 2827,
+ "time": 11.27
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5117e-04",
+ "loss": 0.8109,
+ "slid_loss": 0.7311,
+ "step": 2828,
+ "time": 13.88
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5114e-04",
+ "loss": 0.6289,
+ "slid_loss": 0.7293,
+ "step": 2829,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5111e-04",
+ "loss": 0.6821,
+ "slid_loss": 0.7292,
+ "step": 2830,
+ "time": 12.54
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5107e-04",
+ "loss": 0.7099,
+ "slid_loss": 0.7298,
+ "step": 2831,
+ "time": 14.01
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5104e-04",
+ "loss": 0.7555,
+ "slid_loss": 0.7304,
+ "step": 2832,
+ "time": 12.95
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5101e-04",
+ "loss": 0.7428,
+ "slid_loss": 0.7301,
+ "step": 2833,
+ "time": 14.97
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5097e-04",
+ "loss": 0.6829,
+ "slid_loss": 0.7299,
+ "step": 2834,
+ "time": 13.71
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5094e-04",
+ "loss": 0.7564,
+ "slid_loss": 0.7305,
+ "step": 2835,
+ "time": 12.17
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": "1.5091e-04",
+ "loss": 0.7232,
+ "slid_loss": 0.7291,
+ "step": 2836,
+ "time": 14.43
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5087e-04",
+ "loss": 0.6621,
+ "slid_loss": 0.7277,
+ "step": 2837,
+ "time": 13.63
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5084e-04",
+ "loss": 0.7093,
+ "slid_loss": 0.7269,
+ "step": 2838,
+ "time": 14.17
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5080e-04",
+ "loss": 0.7745,
+ "slid_loss": 0.7277,
+ "step": 2839,
+ "time": 13.44
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5077e-04",
+ "loss": 0.6835,
+ "slid_loss": 0.728,
+ "step": 2840,
+ "time": 13.7
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5074e-04",
+ "loss": 0.69,
+ "slid_loss": 0.7268,
+ "step": 2841,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5070e-04",
+ "loss": 0.7674,
+ "slid_loss": 0.7274,
+ "step": 2842,
+ "time": 11.53
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5067e-04",
+ "loss": 0.7321,
+ "slid_loss": 0.7273,
+ "step": 2843,
+ "time": 13.17
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5064e-04",
+ "loss": 0.7813,
+ "slid_loss": 0.7269,
+ "step": 2844,
+ "time": 13.53
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5060e-04",
+ "loss": 0.7012,
+ "slid_loss": 0.7266,
+ "step": 2845,
+ "time": 13.71
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5057e-04",
+ "loss": 0.6337,
+ "slid_loss": 0.7255,
+ "step": 2846,
+ "time": 12.08
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": "1.5054e-04",
+ "loss": 0.743,
+ "slid_loss": 0.7259,
+ "step": 2847,
+ "time": 11.94
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5050e-04",
+ "loss": 0.7596,
+ "slid_loss": 0.7267,
+ "step": 2848,
+ "time": 14.0
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5047e-04",
+ "loss": 0.7029,
+ "slid_loss": 0.7251,
+ "step": 2849,
+ "time": 13.99
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5044e-04",
+ "loss": 0.6916,
+ "slid_loss": 0.7241,
+ "step": 2850,
+ "time": 15.0
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5040e-04",
+ "loss": 0.7159,
+ "slid_loss": 0.7237,
+ "step": 2851,
+ "time": 12.12
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5037e-04",
+ "loss": 0.796,
+ "slid_loss": 0.7241,
+ "step": 2852,
+ "time": 13.16
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5034e-04",
+ "loss": 0.8071,
+ "slid_loss": 0.7251,
+ "step": 2853,
+ "time": 13.94
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5030e-04",
+ "loss": 0.586,
+ "slid_loss": 0.7228,
+ "step": 2854,
+ "time": 14.34
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5027e-04",
+ "loss": 0.8893,
+ "slid_loss": 0.7251,
+ "step": 2855,
+ "time": 12.82
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5023e-04",
+ "loss": 0.694,
+ "slid_loss": 0.7248,
+ "step": 2856,
+ "time": 13.42
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": "1.5020e-04",
+ "loss": 0.7535,
+ "slid_loss": 0.7245,
+ "step": 2857,
+ "time": 13.67
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.5017e-04",
+ "loss": 0.6745,
+ "slid_loss": 0.7241,
+ "step": 2858,
+ "time": 13.75
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.5013e-04",
+ "loss": 0.7289,
+ "slid_loss": 0.7243,
+ "step": 2859,
+ "time": 13.41
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.5010e-04",
+ "loss": 0.7481,
+ "slid_loss": 0.7254,
+ "step": 2860,
+ "time": 11.45
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.5007e-04",
+ "loss": 0.7399,
+ "slid_loss": 0.7258,
+ "step": 2861,
+ "time": 12.86
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.5003e-04",
+ "loss": 0.7397,
+ "slid_loss": 0.7258,
+ "step": 2862,
+ "time": 13.26
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.5000e-04",
+ "loss": 0.7001,
+ "slid_loss": 0.7252,
+ "step": 2863,
+ "time": 13.23
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.4997e-04",
+ "loss": 0.7309,
+ "slid_loss": 0.724,
+ "step": 2864,
+ "time": 14.05
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.4993e-04",
+ "loss": 0.687,
+ "slid_loss": 0.7232,
+ "step": 2865,
+ "time": 13.37
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.4990e-04",
+ "loss": 0.7897,
+ "slid_loss": 0.7237,
+ "step": 2866,
+ "time": 13.66
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": "1.4987e-04",
+ "loss": 0.7596,
+ "slid_loss": 0.7243,
+ "step": 2867,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4983e-04",
+ "loss": 0.6284,
+ "slid_loss": 0.7244,
+ "step": 2868,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4980e-04",
+ "loss": 0.6644,
+ "slid_loss": 0.7234,
+ "step": 2869,
+ "time": 14.02
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4977e-04",
+ "loss": 0.7921,
+ "slid_loss": 0.724,
+ "step": 2870,
+ "time": 13.56
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4973e-04",
+ "loss": 0.7336,
+ "slid_loss": 0.7243,
+ "step": 2871,
+ "time": 11.18
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4970e-04",
+ "loss": 0.6055,
+ "slid_loss": 0.7231,
+ "step": 2872,
+ "time": 13.62
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4966e-04",
+ "loss": 0.6651,
+ "slid_loss": 0.7224,
+ "step": 2873,
+ "time": 12.01
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4963e-04",
+ "loss": 0.7794,
+ "slid_loss": 0.7234,
+ "step": 2874,
+ "time": 13.64
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4960e-04",
+ "loss": 0.6732,
+ "slid_loss": 0.7236,
+ "step": 2875,
+ "time": 13.85
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4956e-04",
+ "loss": 0.735,
+ "slid_loss": 0.7236,
+ "step": 2876,
+ "time": 12.95
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4953e-04",
+ "loss": 0.7031,
+ "slid_loss": 0.723,
+ "step": 2877,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": "1.4950e-04",
+ "loss": 0.7276,
+ "slid_loss": 0.724,
+ "step": 2878,
+ "time": 13.15
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4946e-04",
+ "loss": 0.7249,
+ "slid_loss": 0.7237,
+ "step": 2879,
+ "time": 13.37
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4943e-04",
+ "loss": 0.6236,
+ "slid_loss": 0.7226,
+ "step": 2880,
+ "time": 12.91
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4940e-04",
+ "loss": 0.7623,
+ "slid_loss": 0.7228,
+ "step": 2881,
+ "time": 11.95
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4936e-04",
+ "loss": 0.6503,
+ "slid_loss": 0.7221,
+ "step": 2882,
+ "time": 12.74
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4933e-04",
+ "loss": 0.7243,
+ "slid_loss": 0.7221,
+ "step": 2883,
+ "time": 12.12
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4930e-04",
+ "loss": 0.6631,
+ "slid_loss": 0.7214,
+ "step": 2884,
+ "time": 13.81
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4926e-04",
+ "loss": 0.6918,
+ "slid_loss": 0.722,
+ "step": 2885,
+ "time": 13.71
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4923e-04",
+ "loss": 0.629,
+ "slid_loss": 0.7205,
+ "step": 2886,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4920e-04",
+ "loss": 0.7635,
+ "slid_loss": 0.7206,
+ "step": 2887,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": "1.4916e-04",
+ "loss": 0.6988,
+ "slid_loss": 0.7205,
+ "step": 2888,
+ "time": 12.67
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4913e-04",
+ "loss": 0.735,
+ "slid_loss": 0.7206,
+ "step": 2889,
+ "time": 12.66
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4909e-04",
+ "loss": 0.7766,
+ "slid_loss": 0.7208,
+ "step": 2890,
+ "time": 13.48
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4906e-04",
+ "loss": 0.6878,
+ "slid_loss": 0.7203,
+ "step": 2891,
+ "time": 14.06
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4903e-04",
+ "loss": 0.6556,
+ "slid_loss": 0.7203,
+ "step": 2892,
+ "time": 13.37
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4899e-04",
+ "loss": 0.6911,
+ "slid_loss": 0.7199,
+ "step": 2893,
+ "time": 12.88
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4896e-04",
+ "loss": 0.7247,
+ "slid_loss": 0.7199,
+ "step": 2894,
+ "time": 13.39
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4893e-04",
+ "loss": 0.6661,
+ "slid_loss": 0.7195,
+ "step": 2895,
+ "time": 10.82
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4889e-04",
+ "loss": 0.7388,
+ "slid_loss": 0.7189,
+ "step": 2896,
+ "time": 12.81
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4886e-04",
+ "loss": 0.7086,
+ "slid_loss": 0.7178,
+ "step": 2897,
+ "time": 12.5
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4883e-04",
+ "loss": 0.6925,
+ "slid_loss": 0.7185,
+ "step": 2898,
+ "time": 13.79
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": "1.4879e-04",
+ "loss": 0.6242,
+ "slid_loss": 0.7168,
+ "step": 2899,
+ "time": 13.29
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4876e-04",
+ "loss": 0.7268,
+ "slid_loss": 0.7173,
+ "step": 2900,
+ "time": 11.32
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4873e-04",
+ "loss": 0.6174,
+ "slid_loss": 0.7165,
+ "step": 2901,
+ "time": 13.53
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4869e-04",
+ "loss": 0.7558,
+ "slid_loss": 0.7173,
+ "step": 2902,
+ "time": 11.51
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4866e-04",
+ "loss": 0.7052,
+ "slid_loss": 0.7177,
+ "step": 2903,
+ "time": 12.78
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4863e-04",
+ "loss": 0.7868,
+ "slid_loss": 0.7179,
+ "step": 2904,
+ "time": 12.42
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4859e-04",
+ "loss": 0.6797,
+ "slid_loss": 0.7175,
+ "step": 2905,
+ "time": 13.65
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4856e-04",
+ "loss": 0.7142,
+ "slid_loss": 0.7175,
+ "step": 2906,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4852e-04",
+ "loss": 0.6828,
+ "slid_loss": 0.7166,
+ "step": 2907,
+ "time": 11.47
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4849e-04",
+ "loss": 0.6812,
+ "slid_loss": 0.7171,
+ "step": 2908,
+ "time": 12.98
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": "1.4846e-04",
+ "loss": 0.7057,
+ "slid_loss": 0.7167,
+ "step": 2909,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4842e-04",
+ "loss": 0.663,
+ "slid_loss": 0.7156,
+ "step": 2910,
+ "time": 12.82
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4839e-04",
+ "loss": 0.6505,
+ "slid_loss": 0.7147,
+ "step": 2911,
+ "time": 13.29
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4836e-04",
+ "loss": 0.7331,
+ "slid_loss": 0.715,
+ "step": 2912,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4832e-04",
+ "loss": 0.6837,
+ "slid_loss": 0.7146,
+ "step": 2913,
+ "time": 13.24
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4829e-04",
+ "loss": 0.6864,
+ "slid_loss": 0.7141,
+ "step": 2914,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4826e-04",
+ "loss": 0.7246,
+ "slid_loss": 0.714,
+ "step": 2915,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4822e-04",
+ "loss": 0.621,
+ "slid_loss": 0.7135,
+ "step": 2916,
+ "time": 13.33
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4819e-04",
+ "loss": 0.6926,
+ "slid_loss": 0.7132,
+ "step": 2917,
+ "time": 13.64
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4816e-04",
+ "loss": 0.695,
+ "slid_loss": 0.7126,
+ "step": 2918,
+ "time": 14.37
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4812e-04",
+ "loss": 0.6725,
+ "slid_loss": 0.711,
+ "step": 2919,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": "1.4809e-04",
+ "loss": 0.8606,
+ "slid_loss": 0.7118,
+ "step": 2920,
+ "time": 13.16
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4806e-04",
+ "loss": 0.6707,
+ "slid_loss": 0.7119,
+ "step": 2921,
+ "time": 13.06
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4802e-04",
+ "loss": 0.7499,
+ "slid_loss": 0.7118,
+ "step": 2922,
+ "time": 11.85
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4799e-04",
+ "loss": 0.7896,
+ "slid_loss": 0.7118,
+ "step": 2923,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4795e-04",
+ "loss": 0.6163,
+ "slid_loss": 0.7109,
+ "step": 2924,
+ "time": 12.59
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4792e-04",
+ "loss": 0.6991,
+ "slid_loss": 0.711,
+ "step": 2925,
+ "time": 13.79
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4789e-04",
+ "loss": 0.7284,
+ "slid_loss": 0.7105,
+ "step": 2926,
+ "time": 11.4
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4785e-04",
+ "loss": 0.6735,
+ "slid_loss": 0.7107,
+ "step": 2927,
+ "time": 13.12
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4782e-04",
+ "loss": 0.7231,
+ "slid_loss": 0.7098,
+ "step": 2928,
+ "time": 14.26
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4779e-04",
+ "loss": 0.7292,
+ "slid_loss": 0.7108,
+ "step": 2929,
+ "time": 13.19
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": "1.4775e-04",
+ "loss": 0.7119,
+ "slid_loss": 0.7111,
+ "step": 2930,
+ "time": 13.31
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4772e-04",
+ "loss": 0.684,
+ "slid_loss": 0.7109,
+ "step": 2931,
+ "time": 13.0
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4769e-04",
+ "loss": 0.7878,
+ "slid_loss": 0.7112,
+ "step": 2932,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4765e-04",
+ "loss": 0.74,
+ "slid_loss": 0.7112,
+ "step": 2933,
+ "time": 13.31
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4762e-04",
+ "loss": 0.5858,
+ "slid_loss": 0.7102,
+ "step": 2934,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4759e-04",
+ "loss": 0.7487,
+ "slid_loss": 0.7101,
+ "step": 2935,
+ "time": 13.49
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4755e-04",
+ "loss": 0.7344,
+ "slid_loss": 0.7102,
+ "step": 2936,
+ "time": 14.17
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4752e-04",
+ "loss": 0.6938,
+ "slid_loss": 0.7105,
+ "step": 2937,
+ "time": 10.98
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4749e-04",
+ "loss": 0.6819,
+ "slid_loss": 0.7103,
+ "step": 2938,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4745e-04",
+ "loss": 0.6859,
+ "slid_loss": 0.7094,
+ "step": 2939,
+ "time": 11.31
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": "1.4742e-04",
+ "loss": 0.5853,
+ "slid_loss": 0.7084,
+ "step": 2940,
+ "time": 11.92
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4739e-04",
+ "loss": 0.7303,
+ "slid_loss": 0.7088,
+ "step": 2941,
+ "time": 12.93
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4735e-04",
+ "loss": 0.7024,
+ "slid_loss": 0.7082,
+ "step": 2942,
+ "time": 11.59
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4732e-04",
+ "loss": 0.7075,
+ "slid_loss": 0.7079,
+ "step": 2943,
+ "time": 13.66
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4728e-04",
+ "loss": 0.6617,
+ "slid_loss": 0.7067,
+ "step": 2944,
+ "time": 13.87
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4725e-04",
+ "loss": 0.7589,
+ "slid_loss": 0.7073,
+ "step": 2945,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4722e-04",
+ "loss": 0.638,
+ "slid_loss": 0.7073,
+ "step": 2946,
+ "time": 11.57
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4718e-04",
+ "loss": 0.6649,
+ "slid_loss": 0.7066,
+ "step": 2947,
+ "time": 13.47
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4715e-04",
+ "loss": 0.7498,
+ "slid_loss": 0.7065,
+ "step": 2948,
+ "time": 12.92
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4712e-04",
+ "loss": 0.6483,
+ "slid_loss": 0.7059,
+ "step": 2949,
+ "time": 12.25
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4708e-04",
+ "loss": 0.7475,
+ "slid_loss": 0.7065,
+ "step": 2950,
+ "time": 11.68
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": "1.4705e-04",
+ "loss": 0.7781,
+ "slid_loss": 0.7071,
+ "step": 2951,
+ "time": 13.96
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4702e-04",
+ "loss": 0.6448,
+ "slid_loss": 0.7056,
+ "step": 2952,
+ "time": 11.04
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4698e-04",
+ "loss": 0.6974,
+ "slid_loss": 0.7045,
+ "step": 2953,
+ "time": 11.26
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4695e-04",
+ "loss": 0.6827,
+ "slid_loss": 0.7055,
+ "step": 2954,
+ "time": 12.74
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4692e-04",
+ "loss": 0.7681,
+ "slid_loss": 0.7042,
+ "step": 2955,
+ "time": 14.03
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4688e-04",
+ "loss": 0.7484,
+ "slid_loss": 0.7048,
+ "step": 2956,
+ "time": 11.31
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4685e-04",
+ "loss": 0.7456,
+ "slid_loss": 0.7047,
+ "step": 2957,
+ "time": 14.19
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4682e-04",
+ "loss": 0.7363,
+ "slid_loss": 0.7053,
+ "step": 2958,
+ "time": 13.3
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4678e-04",
+ "loss": 0.7108,
+ "slid_loss": 0.7051,
+ "step": 2959,
+ "time": 11.22
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4675e-04",
+ "loss": 0.7673,
+ "slid_loss": 0.7053,
+ "step": 2960,
+ "time": 13.81
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": "1.4672e-04",
+ "loss": 0.7133,
+ "slid_loss": 0.7051,
+ "step": 2961,
+ "time": 13.38
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4668e-04",
+ "loss": 0.7739,
+ "slid_loss": 0.7054,
+ "step": 2962,
+ "time": 13.97
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4665e-04",
+ "loss": 0.7004,
+ "slid_loss": 0.7054,
+ "step": 2963,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4662e-04",
+ "loss": 0.756,
+ "slid_loss": 0.7057,
+ "step": 2964,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4658e-04",
+ "loss": 0.7557,
+ "slid_loss": 0.7064,
+ "step": 2965,
+ "time": 11.82
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4655e-04",
+ "loss": 0.6347,
+ "slid_loss": 0.7048,
+ "step": 2966,
+ "time": 13.94
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4652e-04",
+ "loss": 0.7663,
+ "slid_loss": 0.7049,
+ "step": 2967,
+ "time": 13.49
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4648e-04",
+ "loss": 0.7919,
+ "slid_loss": 0.7065,
+ "step": 2968,
+ "time": 13.15
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4645e-04",
+ "loss": 0.7785,
+ "slid_loss": 0.7076,
+ "step": 2969,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4641e-04",
+ "loss": 0.7689,
+ "slid_loss": 0.7074,
+ "step": 2970,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4638e-04",
+ "loss": 0.6634,
+ "slid_loss": 0.7067,
+ "step": 2971,
+ "time": 13.23
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": "1.4635e-04",
+ "loss": 0.7007,
+ "slid_loss": 0.7077,
+ "step": 2972,
+ "time": 13.21
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4631e-04",
+ "loss": 0.773,
+ "slid_loss": 0.7087,
+ "step": 2973,
+ "time": 12.84
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4628e-04",
+ "loss": 0.6484,
+ "slid_loss": 0.7074,
+ "step": 2974,
+ "time": 13.29
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4625e-04",
+ "loss": 0.7372,
+ "slid_loss": 0.7081,
+ "step": 2975,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4621e-04",
+ "loss": 0.6386,
+ "slid_loss": 0.7071,
+ "step": 2976,
+ "time": 12.9
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4618e-04",
+ "loss": 0.7698,
+ "slid_loss": 0.7078,
+ "step": 2977,
+ "time": 13.7
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4615e-04",
+ "loss": 0.8024,
+ "slid_loss": 0.7085,
+ "step": 2978,
+ "time": 14.91
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4611e-04",
+ "loss": 0.6839,
+ "slid_loss": 0.7081,
+ "step": 2979,
+ "time": 13.16
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4608e-04",
+ "loss": 0.7217,
+ "slid_loss": 0.7091,
+ "step": 2980,
+ "time": 13.38
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4605e-04",
+ "loss": 0.5628,
+ "slid_loss": 0.7071,
+ "step": 2981,
+ "time": 13.81
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": "1.4601e-04",
+ "loss": 0.769,
+ "slid_loss": 0.7083,
+ "step": 2982,
+ "time": 11.79
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4598e-04",
+ "loss": 0.6551,
+ "slid_loss": 0.7076,
+ "step": 2983,
+ "time": 11.39
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4595e-04",
+ "loss": 0.6776,
+ "slid_loss": 0.7077,
+ "step": 2984,
+ "time": 13.07
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4591e-04",
+ "loss": 0.6473,
+ "slid_loss": 0.7073,
+ "step": 2985,
+ "time": 13.14
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4588e-04",
+ "loss": 0.7464,
+ "slid_loss": 0.7085,
+ "step": 2986,
+ "time": 10.9
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4585e-04",
+ "loss": 0.6552,
+ "slid_loss": 0.7074,
+ "step": 2987,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4581e-04",
+ "loss": 0.6471,
+ "slid_loss": 0.7069,
+ "step": 2988,
+ "time": 14.06
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4578e-04",
+ "loss": 0.6549,
+ "slid_loss": 0.7061,
+ "step": 2989,
+ "time": 11.25
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4575e-04",
+ "loss": 0.682,
+ "slid_loss": 0.7051,
+ "step": 2990,
+ "time": 13.82
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4571e-04",
+ "loss": 0.6921,
+ "slid_loss": 0.7052,
+ "step": 2991,
+ "time": 12.93
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": "1.4568e-04",
+ "loss": 0.6534,
+ "slid_loss": 0.7051,
+ "step": 2992,
+ "time": 12.91
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4565e-04",
+ "loss": 0.7625,
+ "slid_loss": 0.7059,
+ "step": 2993,
+ "time": 13.13
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4561e-04",
+ "loss": 0.6895,
+ "slid_loss": 0.7055,
+ "step": 2994,
+ "time": 12.99
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4558e-04",
+ "loss": 0.6825,
+ "slid_loss": 0.7057,
+ "step": 2995,
+ "time": 13.93
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4555e-04",
+ "loss": 0.7275,
+ "slid_loss": 0.7056,
+ "step": 2996,
+ "time": 13.66
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4551e-04",
+ "loss": 0.5816,
+ "slid_loss": 0.7043,
+ "step": 2997,
+ "time": 13.23
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4548e-04",
+ "loss": 0.7886,
+ "slid_loss": 0.7052,
+ "step": 2998,
+ "time": 13.51
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4545e-04",
+ "loss": 0.7202,
+ "slid_loss": 0.7062,
+ "step": 2999,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4541e-04",
+ "loss": 0.6905,
+ "slid_loss": 0.7058,
+ "step": 3000,
+ "time": 12.32
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4538e-04",
+ "loss": 0.6916,
+ "slid_loss": 0.7066,
+ "step": 3001,
+ "time": 11.26
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4535e-04",
+ "loss": 0.6123,
+ "slid_loss": 0.7051,
+ "step": 3002,
+ "time": 11.38
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": "1.4531e-04",
+ "loss": 0.757,
+ "slid_loss": 0.7057,
+ "step": 3003,
+ "time": 14.12
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4528e-04",
+ "loss": 0.7257,
+ "slid_loss": 0.7051,
+ "step": 3004,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4525e-04",
+ "loss": 0.6034,
+ "slid_loss": 0.7043,
+ "step": 3005,
+ "time": 12.2
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4521e-04",
+ "loss": 0.623,
+ "slid_loss": 0.7034,
+ "step": 3006,
+ "time": 13.38
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4518e-04",
+ "loss": 0.6806,
+ "slid_loss": 0.7034,
+ "step": 3007,
+ "time": 13.78
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4515e-04",
+ "loss": 0.6505,
+ "slid_loss": 0.7031,
+ "step": 3008,
+ "time": 13.22
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4511e-04",
+ "loss": 0.6953,
+ "slid_loss": 0.7029,
+ "step": 3009,
+ "time": 12.95
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4508e-04",
+ "loss": 0.7343,
+ "slid_loss": 0.7037,
+ "step": 3010,
+ "time": 13.48
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4504e-04",
+ "loss": 0.6928,
+ "slid_loss": 0.7041,
+ "step": 3011,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4501e-04",
+ "loss": 0.6868,
+ "slid_loss": 0.7036,
+ "step": 3012,
+ "time": 13.7
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": "1.4498e-04",
+ "loss": 0.6916,
+ "slid_loss": 0.7037,
+ "step": 3013,
+ "time": 12.93
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4494e-04",
+ "loss": 0.6456,
+ "slid_loss": 0.7033,
+ "step": 3014,
+ "time": 11.38
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4491e-04",
+ "loss": 0.7417,
+ "slid_loss": 0.7035,
+ "step": 3015,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4488e-04",
+ "loss": 0.7384,
+ "slid_loss": 0.7046,
+ "step": 3016,
+ "time": 11.85
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4484e-04",
+ "loss": 0.6083,
+ "slid_loss": 0.7038,
+ "step": 3017,
+ "time": 13.66
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4481e-04",
+ "loss": 0.6081,
+ "slid_loss": 0.7029,
+ "step": 3018,
+ "time": 13.5
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4478e-04",
+ "loss": 0.6343,
+ "slid_loss": 0.7025,
+ "step": 3019,
+ "time": 12.91
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4474e-04",
+ "loss": 0.7031,
+ "slid_loss": 0.701,
+ "step": 3020,
+ "time": 13.83
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4471e-04",
+ "loss": 0.679,
+ "slid_loss": 0.701,
+ "step": 3021,
+ "time": 13.6
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4468e-04",
+ "loss": 0.6341,
+ "slid_loss": 0.6999,
+ "step": 3022,
+ "time": 11.29
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4464e-04",
+ "loss": 0.6778,
+ "slid_loss": 0.6988,
+ "step": 3023,
+ "time": 14.48
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": "1.4461e-04",
+ "loss": 0.7505,
+ "slid_loss": 0.7001,
+ "step": 3024,
+ "time": 13.54
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4458e-04",
+ "loss": 0.5979,
+ "slid_loss": 0.6991,
+ "step": 3025,
+ "time": 14.06
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4454e-04",
+ "loss": 0.6766,
+ "slid_loss": 0.6986,
+ "step": 3026,
+ "time": 13.25
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4451e-04",
+ "loss": 0.5747,
+ "slid_loss": 0.6976,
+ "step": 3027,
+ "time": 10.87
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4448e-04",
+ "loss": 0.7425,
+ "slid_loss": 0.6978,
+ "step": 3028,
+ "time": 14.0
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4444e-04",
+ "loss": 0.7362,
+ "slid_loss": 0.6979,
+ "step": 3029,
+ "time": 12.77
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4441e-04",
+ "loss": 0.7399,
+ "slid_loss": 0.6981,
+ "step": 3030,
+ "time": 13.34
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4438e-04",
+ "loss": 0.7381,
+ "slid_loss": 0.6987,
+ "step": 3031,
+ "time": 11.0
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4434e-04",
+ "loss": 0.6595,
+ "slid_loss": 0.6974,
+ "step": 3032,
+ "time": 11.44
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4431e-04",
+ "loss": 0.6314,
+ "slid_loss": 0.6963,
+ "step": 3033,
+ "time": 13.45
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": "1.4428e-04",
+ "loss": 0.6767,
+ "slid_loss": 0.6972,
+ "step": 3034,
+ "time": 11.82
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4424e-04",
+ "loss": 0.7467,
+ "slid_loss": 0.6972,
+ "step": 3035,
+ "time": 11.56
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4421e-04",
+ "loss": 0.5999,
+ "slid_loss": 0.6959,
+ "step": 3036,
+ "time": 13.23
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4418e-04",
+ "loss": 0.6967,
+ "slid_loss": 0.6959,
+ "step": 3037,
+ "time": 14.18
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4414e-04",
+ "loss": 0.6797,
+ "slid_loss": 0.6959,
+ "step": 3038,
+ "time": 14.02
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4411e-04",
+ "loss": 0.6724,
+ "slid_loss": 0.6957,
+ "step": 3039,
+ "time": 14.16
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4408e-04",
+ "loss": 0.7039,
+ "slid_loss": 0.6969,
+ "step": 3040,
+ "time": 13.52
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4404e-04",
+ "loss": 0.6728,
+ "slid_loss": 0.6963,
+ "step": 3041,
+ "time": 13.23
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4401e-04",
+ "loss": 0.7397,
+ "slid_loss": 0.6967,
+ "step": 3042,
+ "time": 11.58
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4398e-04",
+ "loss": 0.716,
+ "slid_loss": 0.6968,
+ "step": 3043,
+ "time": 13.87
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": "1.4394e-04",
+ "loss": 0.6447,
+ "slid_loss": 0.6966,
+ "step": 3044,
+ "time": 11.98
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4391e-04",
+ "loss": 0.7546,
+ "slid_loss": 0.6966,
+ "step": 3045,
+ "time": 12.23
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4388e-04",
+ "loss": 0.6668,
+ "slid_loss": 0.6969,
+ "step": 3046,
+ "time": 13.85
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4385e-04",
+ "loss": 0.7693,
+ "slid_loss": 0.6979,
+ "step": 3047,
+ "time": 13.92
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4381e-04",
+ "loss": 0.7233,
+ "slid_loss": 0.6977,
+ "step": 3048,
+ "time": 13.87
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4378e-04",
+ "loss": 0.693,
+ "slid_loss": 0.6981,
+ "step": 3049,
+ "time": 12.82
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4375e-04",
+ "loss": 0.6997,
+ "slid_loss": 0.6976,
+ "step": 3050,
+ "time": 13.49
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4371e-04",
+ "loss": 0.7083,
+ "slid_loss": 0.6969,
+ "step": 3051,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4368e-04",
+ "loss": 0.5744,
+ "slid_loss": 0.6962,
+ "step": 3052,
+ "time": 14.13
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4365e-04",
+ "loss": 0.6806,
+ "slid_loss": 0.6961,
+ "step": 3053,
+ "time": 11.09
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4361e-04",
+ "loss": 0.6486,
+ "slid_loss": 0.6957,
+ "step": 3054,
+ "time": 13.72
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": "1.4358e-04",
+ "loss": 0.7679,
+ "slid_loss": 0.6957,
+ "step": 3055,
+ "time": 13.11
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4355e-04",
+ "loss": 0.7074,
+ "slid_loss": 0.6953,
+ "step": 3056,
+ "time": 13.55
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4351e-04",
+ "loss": 0.7615,
+ "slid_loss": 0.6955,
+ "step": 3057,
+ "time": 13.86
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4348e-04",
+ "loss": 0.707,
+ "slid_loss": 0.6952,
+ "step": 3058,
+ "time": 12.85
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4345e-04",
+ "loss": 0.7509,
+ "slid_loss": 0.6956,
+ "step": 3059,
+ "time": 13.2
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4341e-04",
+ "loss": 0.6994,
+ "slid_loss": 0.6949,
+ "step": 3060,
+ "time": 13.43
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4338e-04",
+ "loss": 0.7304,
+ "slid_loss": 0.6951,
+ "step": 3061,
+ "time": 10.99
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4335e-04",
+ "loss": 0.5641,
+ "slid_loss": 0.693,
+ "step": 3062,
+ "time": 12.89
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4331e-04",
+ "loss": 0.689,
+ "slid_loss": 0.6928,
+ "step": 3063,
+ "time": 11.85
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4328e-04",
+ "loss": 0.7601,
+ "slid_loss": 0.6929,
+ "step": 3064,
+ "time": 11.1
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": "1.4325e-04",
+ "loss": 0.6955,
+ "slid_loss": 0.6923,
+ "step": 3065,
+ "time": 14.0
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4321e-04",
+ "loss": 0.6926,
+ "slid_loss": 0.6929,
+ "step": 3066,
+ "time": 11.91
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4318e-04",
+ "loss": 0.7107,
+ "slid_loss": 0.6923,
+ "step": 3067,
+ "time": 13.84
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4315e-04",
+ "loss": 0.6372,
+ "slid_loss": 0.6908,
+ "step": 3068,
+ "time": 12.22
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4311e-04",
+ "loss": 0.6726,
+ "slid_loss": 0.6897,
+ "step": 3069,
+ "time": 12.02
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4308e-04",
+ "loss": 0.7133,
+ "slid_loss": 0.6891,
+ "step": 3070,
+ "time": 13.05
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4305e-04",
+ "loss": 0.6641,
+ "slid_loss": 0.6892,
+ "step": 3071,
+ "time": 11.36
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4301e-04",
+ "loss": 0.6922,
+ "slid_loss": 0.6891,
+ "step": 3072,
+ "time": 12.07
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4298e-04",
+ "loss": 0.6419,
+ "slid_loss": 0.6878,
+ "step": 3073,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4295e-04",
+ "loss": 0.6794,
+ "slid_loss": 0.6881,
+ "step": 3074,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4291e-04",
+ "loss": 0.6922,
+ "slid_loss": 0.6876,
+ "step": 3075,
+ "time": 12.87
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": "1.4288e-04",
+ "loss": 0.7283,
+ "slid_loss": 0.6885,
+ "step": 3076,
+ "time": 13.35
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4285e-04",
+ "loss": 0.776,
+ "slid_loss": 0.6886,
+ "step": 3077,
+ "time": 13.71
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4281e-04",
+ "loss": 0.7573,
+ "slid_loss": 0.6881,
+ "step": 3078,
+ "time": 13.58
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4278e-04",
+ "loss": 0.7384,
+ "slid_loss": 0.6887,
+ "step": 3079,
+ "time": 13.56
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4275e-04",
+ "loss": 0.6639,
+ "slid_loss": 0.6881,
+ "step": 3080,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4272e-04",
+ "loss": 0.7073,
+ "slid_loss": 0.6895,
+ "step": 3081,
+ "time": 13.89
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4268e-04",
+ "loss": 0.7512,
+ "slid_loss": 0.6894,
+ "step": 3082,
+ "time": 13.0
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4265e-04",
+ "loss": 0.7828,
+ "slid_loss": 0.6906,
+ "step": 3083,
+ "time": 13.89
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4262e-04",
+ "loss": 0.7727,
+ "slid_loss": 0.6916,
+ "step": 3084,
+ "time": 13.46
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4258e-04",
+ "loss": 0.5772,
+ "slid_loss": 0.6909,
+ "step": 3085,
+ "time": 13.2
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": "1.4255e-04",
+ "loss": 0.7195,
+ "slid_loss": 0.6906,
+ "step": 3086,
+ "time": 13.68
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4252e-04",
+ "loss": 0.6892,
+ "slid_loss": 0.691,
+ "step": 3087,
+ "time": 13.31
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4248e-04",
+ "loss": 0.6903,
+ "slid_loss": 0.6914,
+ "step": 3088,
+ "time": 12.95
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4245e-04",
+ "loss": 0.7271,
+ "slid_loss": 0.6921,
+ "step": 3089,
+ "time": 13.19
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4242e-04",
+ "loss": 0.6685,
+ "slid_loss": 0.692,
+ "step": 3090,
+ "time": 13.24
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4238e-04",
+ "loss": 0.7246,
+ "slid_loss": 0.6923,
+ "step": 3091,
+ "time": 13.44
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4235e-04",
+ "loss": 0.633,
+ "slid_loss": 0.6921,
+ "step": 3092,
+ "time": 11.55
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4232e-04",
+ "loss": 0.7313,
+ "slid_loss": 0.6918,
+ "step": 3093,
+ "time": 13.69
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4228e-04",
+ "loss": 0.7078,
+ "slid_loss": 0.692,
+ "step": 3094,
+ "time": 13.64
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4225e-04",
+ "loss": 0.7529,
+ "slid_loss": 0.6927,
+ "step": 3095,
+ "time": 13.82
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": "1.4222e-04",
+ "loss": 0.7222,
+ "slid_loss": 0.6926,
+ "step": 3096,
+ "time": 12.13
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4218e-04",
+ "loss": 0.6911,
+ "slid_loss": 0.6937,
+ "step": 3097,
+ "time": 13.81
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4215e-04",
+ "loss": 0.6755,
+ "slid_loss": 0.6926,
+ "step": 3098,
+ "time": 11.55
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4212e-04",
+ "loss": 0.6705,
+ "slid_loss": 0.6921,
+ "step": 3099,
+ "time": 11.41
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4209e-04",
+ "loss": 0.5421,
+ "slid_loss": 0.6906,
+ "step": 3100,
+ "time": 13.28
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4205e-04",
+ "loss": 0.7454,
+ "slid_loss": 0.6911,
+ "step": 3101,
+ "time": 13.42
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4202e-04",
+ "loss": 0.7021,
+ "slid_loss": 0.692,
+ "step": 3102,
+ "time": 12.71
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4199e-04",
+ "loss": 0.6789,
+ "slid_loss": 0.6913,
+ "step": 3103,
+ "time": 13.72
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4195e-04",
+ "loss": 0.6407,
+ "slid_loss": 0.6904,
+ "step": 3104,
+ "time": 13.47
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4192e-04",
+ "loss": 0.6743,
+ "slid_loss": 0.6911,
+ "step": 3105,
+ "time": 14.58
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4189e-04",
+ "loss": 0.8056,
+ "slid_loss": 0.6929,
+ "step": 3106,
+ "time": 12.96
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": "1.4185e-04",
+ "loss": 0.7951,
+ "slid_loss": 0.6941,
+ "step": 3107,
+ "time": 14.03
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4182e-04",
+ "loss": 0.766,
+ "slid_loss": 0.6952,
+ "step": 3108,
+ "time": 12.06
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4179e-04",
+ "loss": 0.7617,
+ "slid_loss": 0.6959,
+ "step": 3109,
+ "time": 12.63
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4175e-04",
+ "loss": 0.6083,
+ "slid_loss": 0.6947,
+ "step": 3110,
+ "time": 12.61
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4172e-04",
+ "loss": 0.6002,
+ "slid_loss": 0.6937,
+ "step": 3111,
+ "time": 13.57
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4169e-04",
+ "loss": 0.7745,
+ "slid_loss": 0.6946,
+ "step": 3112,
+ "time": 14.02
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4166e-04",
+ "loss": 0.7196,
+ "slid_loss": 0.6949,
+ "step": 3113,
+ "time": 13.4
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4162e-04",
+ "loss": 0.6583,
+ "slid_loss": 0.695,
+ "step": 3114,
+ "time": 13.04
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4159e-04",
+ "loss": 0.7411,
+ "slid_loss": 0.695,
+ "step": 3115,
+ "time": 11.5
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4156e-04",
+ "loss": 0.6555,
+ "slid_loss": 0.6942,
+ "step": 3116,
+ "time": 13.48
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": "1.4152e-04",
+ "loss": 0.7387,
+ "slid_loss": 0.6955,
+ "step": 3117,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4149e-04",
+ "loss": 0.6716,
+ "slid_loss": 0.6961,
+ "step": 3118,
+ "time": 13.52
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4146e-04",
+ "loss": 0.7415,
+ "slid_loss": 0.6972,
+ "step": 3119,
+ "time": 13.47
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4142e-04",
+ "loss": 0.751,
+ "slid_loss": 0.6977,
+ "step": 3120,
+ "time": 13.53
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4139e-04",
+ "loss": 0.7142,
+ "slid_loss": 0.698,
+ "step": 3121,
+ "time": 12.21
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4136e-04",
+ "loss": 0.6907,
+ "slid_loss": 0.6986,
+ "step": 3122,
+ "time": 13.03
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4132e-04",
+ "loss": 0.7822,
+ "slid_loss": 0.6996,
+ "step": 3123,
+ "time": 12.19
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4129e-04",
+ "loss": 0.7672,
+ "slid_loss": 0.6998,
+ "step": 3124,
+ "time": 168.7
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4126e-04",
+ "loss": 0.7612,
+ "slid_loss": 0.7014,
+ "step": 3125,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4123e-04",
+ "loss": 0.7416,
+ "slid_loss": 0.7021,
+ "step": 3126,
+ "time": 11.74
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4119e-04",
+ "loss": 0.7642,
+ "slid_loss": 0.704,
+ "step": 3127,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": "1.4116e-04",
+ "loss": 0.8066,
+ "slid_loss": 0.7046,
+ "step": 3128,
+ "time": 11.75
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4113e-04",
+ "loss": 0.6525,
+ "slid_loss": 0.7038,
+ "step": 3129,
+ "time": 14.44
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4109e-04",
+ "loss": 0.7685,
+ "slid_loss": 0.7041,
+ "step": 3130,
+ "time": 13.63
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4106e-04",
+ "loss": 0.7013,
+ "slid_loss": 0.7037,
+ "step": 3131,
+ "time": 12.28
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4103e-04",
+ "loss": 0.748,
+ "slid_loss": 0.7046,
+ "step": 3132,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4099e-04",
+ "loss": 0.7398,
+ "slid_loss": 0.7057,
+ "step": 3133,
+ "time": 13.65
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4096e-04",
+ "loss": 0.7436,
+ "slid_loss": 0.7063,
+ "step": 3134,
+ "time": 12.95
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4093e-04",
+ "loss": 0.5862,
+ "slid_loss": 0.7047,
+ "step": 3135,
+ "time": 12.98
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4090e-04",
+ "loss": 0.7214,
+ "slid_loss": 0.7059,
+ "step": 3136,
+ "time": 13.29
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4086e-04",
+ "loss": 0.566,
+ "slid_loss": 0.7046,
+ "step": 3137,
+ "time": 13.83
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": "1.4083e-04",
+ "loss": 0.6743,
+ "slid_loss": 0.7046,
+ "step": 3138,
+ "time": 12.35
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4080e-04",
+ "loss": 0.709,
+ "slid_loss": 0.7049,
+ "step": 3139,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4076e-04",
+ "loss": 0.6561,
+ "slid_loss": 0.7045,
+ "step": 3140,
+ "time": 12.22
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4073e-04",
+ "loss": 0.7308,
+ "slid_loss": 0.705,
+ "step": 3141,
+ "time": 11.66
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4070e-04",
+ "loss": 0.7066,
+ "slid_loss": 0.7047,
+ "step": 3142,
+ "time": 11.74
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4067e-04",
+ "loss": 0.695,
+ "slid_loss": 0.7045,
+ "step": 3143,
+ "time": 11.9
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4063e-04",
+ "loss": 0.613,
+ "slid_loss": 0.7042,
+ "step": 3144,
+ "time": 13.97
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4060e-04",
+ "loss": 0.6559,
+ "slid_loss": 0.7032,
+ "step": 3145,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4057e-04",
+ "loss": 0.7764,
+ "slid_loss": 0.7043,
+ "step": 3146,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4053e-04",
+ "loss": 0.732,
+ "slid_loss": 0.7039,
+ "step": 3147,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4050e-04",
+ "loss": 0.7285,
+ "slid_loss": 0.704,
+ "step": 3148,
+ "time": 12.72
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": "1.4047e-04",
+ "loss": 0.8375,
+ "slid_loss": 0.7054,
+ "step": 3149,
+ "time": 13.47
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4043e-04",
+ "loss": 0.6455,
+ "slid_loss": 0.7049,
+ "step": 3150,
+ "time": 13.65
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4040e-04",
+ "loss": 0.8021,
+ "slid_loss": 0.7058,
+ "step": 3151,
+ "time": 13.55
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4037e-04",
+ "loss": 0.722,
+ "slid_loss": 0.7073,
+ "step": 3152,
+ "time": 11.22
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4034e-04",
+ "loss": 0.5594,
+ "slid_loss": 0.7061,
+ "step": 3153,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4030e-04",
+ "loss": 0.6474,
+ "slid_loss": 0.7061,
+ "step": 3154,
+ "time": 12.02
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4027e-04",
+ "loss": 0.6189,
+ "slid_loss": 0.7046,
+ "step": 3155,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4024e-04",
+ "loss": 0.6948,
+ "slid_loss": 0.7045,
+ "step": 3156,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4020e-04",
+ "loss": 0.7048,
+ "slid_loss": 0.7039,
+ "step": 3157,
+ "time": 11.85
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4017e-04",
+ "loss": 0.6745,
+ "slid_loss": 0.7036,
+ "step": 3158,
+ "time": 14.11
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": "1.4014e-04",
+ "loss": 0.7211,
+ "slid_loss": 0.7033,
+ "step": 3159,
+ "time": 12.43
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.4011e-04",
+ "loss": 0.7262,
+ "slid_loss": 0.7035,
+ "step": 3160,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.4007e-04",
+ "loss": 0.6036,
+ "slid_loss": 0.7023,
+ "step": 3161,
+ "time": 13.36
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.4004e-04",
+ "loss": 0.6643,
+ "slid_loss": 0.7033,
+ "step": 3162,
+ "time": 13.19
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.4001e-04",
+ "loss": 0.7439,
+ "slid_loss": 0.7038,
+ "step": 3163,
+ "time": 11.6
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.3997e-04",
+ "loss": 0.8062,
+ "slid_loss": 0.7043,
+ "step": 3164,
+ "time": 10.98
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.3994e-04",
+ "loss": 0.7249,
+ "slid_loss": 0.7046,
+ "step": 3165,
+ "time": 13.07
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.3991e-04",
+ "loss": 0.6062,
+ "slid_loss": 0.7037,
+ "step": 3166,
+ "time": 13.65
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.3988e-04",
+ "loss": 0.6054,
+ "slid_loss": 0.7027,
+ "step": 3167,
+ "time": 13.85
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.3984e-04",
+ "loss": 0.6388,
+ "slid_loss": 0.7027,
+ "step": 3168,
+ "time": 11.46
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": "1.3981e-04",
+ "loss": 0.8312,
+ "slid_loss": 0.7043,
+ "step": 3169,
+ "time": 13.35
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3978e-04",
+ "loss": 0.6952,
+ "slid_loss": 0.7041,
+ "step": 3170,
+ "time": 12.01
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3974e-04",
+ "loss": 0.715,
+ "slid_loss": 0.7046,
+ "step": 3171,
+ "time": 14.03
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3971e-04",
+ "loss": 0.6922,
+ "slid_loss": 0.7046,
+ "step": 3172,
+ "time": 13.17
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3968e-04",
+ "loss": 0.7078,
+ "slid_loss": 0.7052,
+ "step": 3173,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3965e-04",
+ "loss": 0.66,
+ "slid_loss": 0.7051,
+ "step": 3174,
+ "time": 14.23
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3961e-04",
+ "loss": 0.7304,
+ "slid_loss": 0.7054,
+ "step": 3175,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3958e-04",
+ "loss": 0.6139,
+ "slid_loss": 0.7043,
+ "step": 3176,
+ "time": 13.51
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3955e-04",
+ "loss": 0.732,
+ "slid_loss": 0.7038,
+ "step": 3177,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3951e-04",
+ "loss": 0.6709,
+ "slid_loss": 0.703,
+ "step": 3178,
+ "time": 12.19
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3948e-04",
+ "loss": 0.6941,
+ "slid_loss": 0.7025,
+ "step": 3179,
+ "time": 13.19
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": "1.3945e-04",
+ "loss": 0.6646,
+ "slid_loss": 0.7025,
+ "step": 3180,
+ "time": 12.83
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3942e-04",
+ "loss": 0.6924,
+ "slid_loss": 0.7024,
+ "step": 3181,
+ "time": 11.1
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3938e-04",
+ "loss": 0.6183,
+ "slid_loss": 0.7011,
+ "step": 3182,
+ "time": 13.67
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3935e-04",
+ "loss": 0.7008,
+ "slid_loss": 0.7003,
+ "step": 3183,
+ "time": 13.51
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3932e-04",
+ "loss": 0.6288,
+ "slid_loss": 0.6988,
+ "step": 3184,
+ "time": 12.18
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3929e-04",
+ "loss": 0.7464,
+ "slid_loss": 0.7005,
+ "step": 3185,
+ "time": 11.66
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3925e-04",
+ "loss": 0.8301,
+ "slid_loss": 0.7016,
+ "step": 3186,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3922e-04",
+ "loss": 0.6097,
+ "slid_loss": 0.7008,
+ "step": 3187,
+ "time": 13.6
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3919e-04",
+ "loss": 0.744,
+ "slid_loss": 0.7014,
+ "step": 3188,
+ "time": 12.87
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3915e-04",
+ "loss": 0.676,
+ "slid_loss": 0.7008,
+ "step": 3189,
+ "time": 13.48
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": "1.3912e-04",
+ "loss": 0.6278,
+ "slid_loss": 0.7004,
+ "step": 3190,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3909e-04",
+ "loss": 0.7287,
+ "slid_loss": 0.7005,
+ "step": 3191,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3906e-04",
+ "loss": 0.6362,
+ "slid_loss": 0.7005,
+ "step": 3192,
+ "time": 13.71
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3902e-04",
+ "loss": 0.6782,
+ "slid_loss": 0.7,
+ "step": 3193,
+ "time": 10.92
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3899e-04",
+ "loss": 0.644,
+ "slid_loss": 0.6993,
+ "step": 3194,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3896e-04",
+ "loss": 0.8051,
+ "slid_loss": 0.6999,
+ "step": 3195,
+ "time": 14.14
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3893e-04",
+ "loss": 0.6883,
+ "slid_loss": 0.6995,
+ "step": 3196,
+ "time": 13.72
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3889e-04",
+ "loss": 0.6589,
+ "slid_loss": 0.6992,
+ "step": 3197,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3886e-04",
+ "loss": 0.6556,
+ "slid_loss": 0.699,
+ "step": 3198,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3883e-04",
+ "loss": 0.6386,
+ "slid_loss": 0.6987,
+ "step": 3199,
+ "time": 13.37
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3879e-04",
+ "loss": 0.7216,
+ "slid_loss": 0.7005,
+ "step": 3200,
+ "time": 13.95
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": "1.3876e-04",
+ "loss": 0.7405,
+ "slid_loss": 0.7004,
+ "step": 3201,
+ "time": 11.28
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3873e-04",
+ "loss": 0.6524,
+ "slid_loss": 0.6999,
+ "step": 3202,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3870e-04",
+ "loss": 0.756,
+ "slid_loss": 0.7007,
+ "step": 3203,
+ "time": 12.21
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3866e-04",
+ "loss": 0.6193,
+ "slid_loss": 0.7005,
+ "step": 3204,
+ "time": 13.51
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3863e-04",
+ "loss": 0.7209,
+ "slid_loss": 0.701,
+ "step": 3205,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3860e-04",
+ "loss": 0.6373,
+ "slid_loss": 0.6993,
+ "step": 3206,
+ "time": 13.42
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3857e-04",
+ "loss": 0.7251,
+ "slid_loss": 0.6986,
+ "step": 3207,
+ "time": 14.08
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3853e-04",
+ "loss": 0.6462,
+ "slid_loss": 0.6974,
+ "step": 3208,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3850e-04",
+ "loss": 0.5842,
+ "slid_loss": 0.6956,
+ "step": 3209,
+ "time": 13.53
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3847e-04",
+ "loss": 0.6969,
+ "slid_loss": 0.6965,
+ "step": 3210,
+ "time": 12.56
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": "1.3844e-04",
+ "loss": 0.5923,
+ "slid_loss": 0.6964,
+ "step": 3211,
+ "time": 13.91
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3840e-04",
+ "loss": 0.7121,
+ "slid_loss": 0.6958,
+ "step": 3212,
+ "time": 11.85
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3837e-04",
+ "loss": 0.7137,
+ "slid_loss": 0.6957,
+ "step": 3213,
+ "time": 12.96
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3834e-04",
+ "loss": 0.6624,
+ "slid_loss": 0.6958,
+ "step": 3214,
+ "time": 13.06
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3830e-04",
+ "loss": 0.8034,
+ "slid_loss": 0.6964,
+ "step": 3215,
+ "time": 13.66
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3827e-04",
+ "loss": 0.7099,
+ "slid_loss": 0.6969,
+ "step": 3216,
+ "time": 14.02
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3824e-04",
+ "loss": 0.6684,
+ "slid_loss": 0.6962,
+ "step": 3217,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3821e-04",
+ "loss": 0.674,
+ "slid_loss": 0.6962,
+ "step": 3218,
+ "time": 13.77
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3817e-04",
+ "loss": 0.8261,
+ "slid_loss": 0.6971,
+ "step": 3219,
+ "time": 13.55
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3814e-04",
+ "loss": 0.6781,
+ "slid_loss": 0.6964,
+ "step": 3220,
+ "time": 13.93
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": "1.3811e-04",
+ "loss": 0.7043,
+ "slid_loss": 0.6963,
+ "step": 3221,
+ "time": 13.58
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3808e-04",
+ "loss": 0.6437,
+ "slid_loss": 0.6958,
+ "step": 3222,
+ "time": 12.67
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3804e-04",
+ "loss": 0.6106,
+ "slid_loss": 0.6941,
+ "step": 3223,
+ "time": 13.58
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3801e-04",
+ "loss": 0.7133,
+ "slid_loss": 0.6935,
+ "step": 3224,
+ "time": 13.35
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3798e-04",
+ "loss": 0.5977,
+ "slid_loss": 0.6919,
+ "step": 3225,
+ "time": 11.18
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3795e-04",
+ "loss": 0.6391,
+ "slid_loss": 0.6909,
+ "step": 3226,
+ "time": 11.53
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3791e-04",
+ "loss": 0.7556,
+ "slid_loss": 0.6908,
+ "step": 3227,
+ "time": 13.48
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3788e-04",
+ "loss": 0.6963,
+ "slid_loss": 0.6897,
+ "step": 3228,
+ "time": 11.68
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3785e-04",
+ "loss": 0.6852,
+ "slid_loss": 0.69,
+ "step": 3229,
+ "time": 12.81
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3782e-04",
+ "loss": 0.7193,
+ "slid_loss": 0.6895,
+ "step": 3230,
+ "time": 14.07
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3778e-04",
+ "loss": 0.6781,
+ "slid_loss": 0.6893,
+ "step": 3231,
+ "time": 11.34
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": "1.3775e-04",
+ "loss": 0.6312,
+ "slid_loss": 0.6881,
+ "step": 3232,
+ "time": 11.43
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3772e-04",
+ "loss": 0.6932,
+ "slid_loss": 0.6877,
+ "step": 3233,
+ "time": 13.98
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3769e-04",
+ "loss": 0.7154,
+ "slid_loss": 0.6874,
+ "step": 3234,
+ "time": 13.29
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3765e-04",
+ "loss": 0.6217,
+ "slid_loss": 0.6877,
+ "step": 3235,
+ "time": 12.12
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3762e-04",
+ "loss": 0.5643,
+ "slid_loss": 0.6862,
+ "step": 3236,
+ "time": 12.86
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3759e-04",
+ "loss": 0.6934,
+ "slid_loss": 0.6874,
+ "step": 3237,
+ "time": 13.13
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3756e-04",
+ "loss": 0.7757,
+ "slid_loss": 0.6885,
+ "step": 3238,
+ "time": 12.23
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3752e-04",
+ "loss": 0.8026,
+ "slid_loss": 0.6894,
+ "step": 3239,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3749e-04",
+ "loss": 0.7113,
+ "slid_loss": 0.6899,
+ "step": 3240,
+ "time": 13.52
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3746e-04",
+ "loss": 0.7167,
+ "slid_loss": 0.6898,
+ "step": 3241,
+ "time": 12.77
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": "1.3743e-04",
+ "loss": 0.6399,
+ "slid_loss": 0.6891,
+ "step": 3242,
+ "time": 13.68
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3739e-04",
+ "loss": 0.5965,
+ "slid_loss": 0.6881,
+ "step": 3243,
+ "time": 13.72
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3736e-04",
+ "loss": 0.742,
+ "slid_loss": 0.6894,
+ "step": 3244,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3733e-04",
+ "loss": 0.6554,
+ "slid_loss": 0.6894,
+ "step": 3245,
+ "time": 12.81
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3730e-04",
+ "loss": 0.6197,
+ "slid_loss": 0.6879,
+ "step": 3246,
+ "time": 13.18
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3726e-04",
+ "loss": 0.7444,
+ "slid_loss": 0.688,
+ "step": 3247,
+ "time": 11.91
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3723e-04",
+ "loss": 0.6823,
+ "slid_loss": 0.6875,
+ "step": 3248,
+ "time": 11.25
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3720e-04",
+ "loss": 0.7295,
+ "slid_loss": 0.6864,
+ "step": 3249,
+ "time": 12.23
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3717e-04",
+ "loss": 0.698,
+ "slid_loss": 0.687,
+ "step": 3250,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3713e-04",
+ "loss": 0.6449,
+ "slid_loss": 0.6854,
+ "step": 3251,
+ "time": 13.45
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3710e-04",
+ "loss": 0.7466,
+ "slid_loss": 0.6856,
+ "step": 3252,
+ "time": 13.93
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": "1.3707e-04",
+ "loss": 0.6312,
+ "slid_loss": 0.6864,
+ "step": 3253,
+ "time": 13.57
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3704e-04",
+ "loss": 0.6325,
+ "slid_loss": 0.6862,
+ "step": 3254,
+ "time": 13.74
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3701e-04",
+ "loss": 0.69,
+ "slid_loss": 0.6869,
+ "step": 3255,
+ "time": 12.23
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3697e-04",
+ "loss": 0.6232,
+ "slid_loss": 0.6862,
+ "step": 3256,
+ "time": 14.03
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3694e-04",
+ "loss": 0.5775,
+ "slid_loss": 0.6849,
+ "step": 3257,
+ "time": 13.01
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3691e-04",
+ "loss": 0.6228,
+ "slid_loss": 0.6844,
+ "step": 3258,
+ "time": 11.4
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3688e-04",
+ "loss": 0.6261,
+ "slid_loss": 0.6835,
+ "step": 3259,
+ "time": 11.31
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3684e-04",
+ "loss": 0.7373,
+ "slid_loss": 0.6836,
+ "step": 3260,
+ "time": 11.39
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3681e-04",
+ "loss": 0.5971,
+ "slid_loss": 0.6835,
+ "step": 3261,
+ "time": 12.83
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3678e-04",
+ "loss": 0.7308,
+ "slid_loss": 0.6842,
+ "step": 3262,
+ "time": 12.52
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": "1.3675e-04",
+ "loss": 0.6147,
+ "slid_loss": 0.6829,
+ "step": 3263,
+ "time": 13.4
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3671e-04",
+ "loss": 0.7135,
+ "slid_loss": 0.682,
+ "step": 3264,
+ "time": 14.24
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3668e-04",
+ "loss": 0.6215,
+ "slid_loss": 0.6809,
+ "step": 3265,
+ "time": 10.95
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3665e-04",
+ "loss": 0.6423,
+ "slid_loss": 0.6813,
+ "step": 3266,
+ "time": 13.0
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3662e-04",
+ "loss": 0.8409,
+ "slid_loss": 0.6836,
+ "step": 3267,
+ "time": 13.33
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3658e-04",
+ "loss": 0.6828,
+ "slid_loss": 0.6841,
+ "step": 3268,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3655e-04",
+ "loss": 0.625,
+ "slid_loss": 0.682,
+ "step": 3269,
+ "time": 12.3
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3652e-04",
+ "loss": 0.6589,
+ "slid_loss": 0.6817,
+ "step": 3270,
+ "time": 11.44
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3649e-04",
+ "loss": 0.6201,
+ "slid_loss": 0.6807,
+ "step": 3271,
+ "time": 13.0
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3646e-04",
+ "loss": 0.8047,
+ "slid_loss": 0.6818,
+ "step": 3272,
+ "time": 14.46
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": "1.3642e-04",
+ "loss": 0.7142,
+ "slid_loss": 0.6819,
+ "step": 3273,
+ "time": 11.07
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3639e-04",
+ "loss": 0.7352,
+ "slid_loss": 0.6827,
+ "step": 3274,
+ "time": 11.95
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3636e-04",
+ "loss": 0.7001,
+ "slid_loss": 0.6823,
+ "step": 3275,
+ "time": 12.48
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3633e-04",
+ "loss": 0.634,
+ "slid_loss": 0.6825,
+ "step": 3276,
+ "time": 13.24
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3629e-04",
+ "loss": 0.6638,
+ "slid_loss": 0.6819,
+ "step": 3277,
+ "time": 14.62
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3626e-04",
+ "loss": 0.6915,
+ "slid_loss": 0.6821,
+ "step": 3278,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3623e-04",
+ "loss": 0.7559,
+ "slid_loss": 0.6827,
+ "step": 3279,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3620e-04",
+ "loss": 0.6621,
+ "slid_loss": 0.6827,
+ "step": 3280,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3617e-04",
+ "loss": 0.6505,
+ "slid_loss": 0.6822,
+ "step": 3281,
+ "time": 13.9
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3613e-04",
+ "loss": 0.5872,
+ "slid_loss": 0.6819,
+ "step": 3282,
+ "time": 13.83
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3610e-04",
+ "loss": 0.6235,
+ "slid_loss": 0.6812,
+ "step": 3283,
+ "time": 12.37
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": "1.3607e-04",
+ "loss": 0.6815,
+ "slid_loss": 0.6817,
+ "step": 3284,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3604e-04",
+ "loss": 0.5809,
+ "slid_loss": 0.68,
+ "step": 3285,
+ "time": 13.64
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3600e-04",
+ "loss": 0.6227,
+ "slid_loss": 0.678,
+ "step": 3286,
+ "time": 12.72
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3597e-04",
+ "loss": 0.5773,
+ "slid_loss": 0.6776,
+ "step": 3287,
+ "time": 12.99
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3594e-04",
+ "loss": 0.627,
+ "slid_loss": 0.6765,
+ "step": 3288,
+ "time": 11.81
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3591e-04",
+ "loss": 0.6733,
+ "slid_loss": 0.6764,
+ "step": 3289,
+ "time": 12.92
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3588e-04",
+ "loss": 0.6861,
+ "slid_loss": 0.677,
+ "step": 3290,
+ "time": 11.13
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3584e-04",
+ "loss": 0.5781,
+ "slid_loss": 0.6755,
+ "step": 3291,
+ "time": 12.79
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3581e-04",
+ "loss": 0.5836,
+ "slid_loss": 0.675,
+ "step": 3292,
+ "time": 13.56
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3578e-04",
+ "loss": 0.6979,
+ "slid_loss": 0.6752,
+ "step": 3293,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": "1.3575e-04",
+ "loss": 0.6836,
+ "slid_loss": 0.6756,
+ "step": 3294,
+ "time": 13.72
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3571e-04",
+ "loss": 0.7617,
+ "slid_loss": 0.6752,
+ "step": 3295,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3568e-04",
+ "loss": 0.7569,
+ "slid_loss": 0.6758,
+ "step": 3296,
+ "time": 13.16
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3565e-04",
+ "loss": 0.6258,
+ "slid_loss": 0.6755,
+ "step": 3297,
+ "time": 13.68
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3562e-04",
+ "loss": 0.6383,
+ "slid_loss": 0.6753,
+ "step": 3298,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3559e-04",
+ "loss": 0.6902,
+ "slid_loss": 0.6759,
+ "step": 3299,
+ "time": 11.53
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3555e-04",
+ "loss": 0.6196,
+ "slid_loss": 0.6748,
+ "step": 3300,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3552e-04",
+ "loss": 0.7022,
+ "slid_loss": 0.6744,
+ "step": 3301,
+ "time": 13.11
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3549e-04",
+ "loss": 0.5931,
+ "slid_loss": 0.6739,
+ "step": 3302,
+ "time": 13.11
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3546e-04",
+ "loss": 0.659,
+ "slid_loss": 0.6729,
+ "step": 3303,
+ "time": 13.42
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3543e-04",
+ "loss": 0.6057,
+ "slid_loss": 0.6727,
+ "step": 3304,
+ "time": 12.57
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": "1.3539e-04",
+ "loss": 0.6678,
+ "slid_loss": 0.6722,
+ "step": 3305,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3536e-04",
+ "loss": 0.6154,
+ "slid_loss": 0.672,
+ "step": 3306,
+ "time": 13.89
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3533e-04",
+ "loss": 0.5986,
+ "slid_loss": 0.6707,
+ "step": 3307,
+ "time": 12.89
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3530e-04",
+ "loss": 0.5676,
+ "slid_loss": 0.6699,
+ "step": 3308,
+ "time": 13.75
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3527e-04",
+ "loss": 0.7246,
+ "slid_loss": 0.6714,
+ "step": 3309,
+ "time": 13.07
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3523e-04",
+ "loss": 0.5979,
+ "slid_loss": 0.6704,
+ "step": 3310,
+ "time": 11.04
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3520e-04",
+ "loss": 0.6939,
+ "slid_loss": 0.6714,
+ "step": 3311,
+ "time": 12.81
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3517e-04",
+ "loss": 0.6469,
+ "slid_loss": 0.6707,
+ "step": 3312,
+ "time": 12.94
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3514e-04",
+ "loss": 0.5894,
+ "slid_loss": 0.6695,
+ "step": 3313,
+ "time": 13.91
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3511e-04",
+ "loss": 0.7303,
+ "slid_loss": 0.6702,
+ "step": 3314,
+ "time": 12.89
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": "1.3507e-04",
+ "loss": 0.6274,
+ "slid_loss": 0.6684,
+ "step": 3315,
+ "time": 12.23
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3504e-04",
+ "loss": 0.7723,
+ "slid_loss": 0.669,
+ "step": 3316,
+ "time": 10.93
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3501e-04",
+ "loss": 0.6318,
+ "slid_loss": 0.6687,
+ "step": 3317,
+ "time": 13.35
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3498e-04",
+ "loss": 0.6214,
+ "slid_loss": 0.6681,
+ "step": 3318,
+ "time": 13.14
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3495e-04",
+ "loss": 0.5654,
+ "slid_loss": 0.6655,
+ "step": 3319,
+ "time": 13.76
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3491e-04",
+ "loss": 0.6817,
+ "slid_loss": 0.6656,
+ "step": 3320,
+ "time": 12.19
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3488e-04",
+ "loss": 0.5939,
+ "slid_loss": 0.6645,
+ "step": 3321,
+ "time": 11.69
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3485e-04",
+ "loss": 0.6668,
+ "slid_loss": 0.6647,
+ "step": 3322,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3482e-04",
+ "loss": 0.6362,
+ "slid_loss": 0.6649,
+ "step": 3323,
+ "time": 12.59
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3479e-04",
+ "loss": 0.5771,
+ "slid_loss": 0.6636,
+ "step": 3324,
+ "time": 13.9
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": "1.3475e-04",
+ "loss": 0.6563,
+ "slid_loss": 0.6642,
+ "step": 3325,
+ "time": 13.69
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3472e-04",
+ "loss": 0.6076,
+ "slid_loss": 0.6639,
+ "step": 3326,
+ "time": 12.42
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3469e-04",
+ "loss": 0.6605,
+ "slid_loss": 0.6629,
+ "step": 3327,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3466e-04",
+ "loss": 0.5887,
+ "slid_loss": 0.6618,
+ "step": 3328,
+ "time": 11.43
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3463e-04",
+ "loss": 0.6595,
+ "slid_loss": 0.6616,
+ "step": 3329,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3459e-04",
+ "loss": 0.5991,
+ "slid_loss": 0.6604,
+ "step": 3330,
+ "time": 13.11
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3456e-04",
+ "loss": 0.6761,
+ "slid_loss": 0.6603,
+ "step": 3331,
+ "time": 12.05
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3453e-04",
+ "loss": 0.633,
+ "slid_loss": 0.6604,
+ "step": 3332,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3450e-04",
+ "loss": 0.7339,
+ "slid_loss": 0.6608,
+ "step": 3333,
+ "time": 11.52
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3447e-04",
+ "loss": 0.715,
+ "slid_loss": 0.6608,
+ "step": 3334,
+ "time": 13.79
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3443e-04",
+ "loss": 0.7094,
+ "slid_loss": 0.6616,
+ "step": 3335,
+ "time": 13.31
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": "1.3440e-04",
+ "loss": 0.7156,
+ "slid_loss": 0.6632,
+ "step": 3336,
+ "time": 11.47
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3437e-04",
+ "loss": 0.6799,
+ "slid_loss": 0.663,
+ "step": 3337,
+ "time": 12.6
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3434e-04",
+ "loss": 0.6772,
+ "slid_loss": 0.662,
+ "step": 3338,
+ "time": 13.31
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3431e-04",
+ "loss": 0.6144,
+ "slid_loss": 0.6602,
+ "step": 3339,
+ "time": 13.33
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3428e-04",
+ "loss": 0.6166,
+ "slid_loss": 0.6592,
+ "step": 3340,
+ "time": 12.87
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3424e-04",
+ "loss": 0.6997,
+ "slid_loss": 0.659,
+ "step": 3341,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3421e-04",
+ "loss": 0.72,
+ "slid_loss": 0.6598,
+ "step": 3342,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3418e-04",
+ "loss": 0.638,
+ "slid_loss": 0.6603,
+ "step": 3343,
+ "time": 13.02
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3415e-04",
+ "loss": 0.6522,
+ "slid_loss": 0.6594,
+ "step": 3344,
+ "time": 13.8
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3412e-04",
+ "loss": 0.6518,
+ "slid_loss": 0.6593,
+ "step": 3345,
+ "time": 12.09
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": "1.3408e-04",
+ "loss": 0.6996,
+ "slid_loss": 0.6601,
+ "step": 3346,
+ "time": 12.51
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3405e-04",
+ "loss": 0.5865,
+ "slid_loss": 0.6585,
+ "step": 3347,
+ "time": 11.63
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3402e-04",
+ "loss": 0.6442,
+ "slid_loss": 0.6582,
+ "step": 3348,
+ "time": 11.51
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3399e-04",
+ "loss": 0.6179,
+ "slid_loss": 0.657,
+ "step": 3349,
+ "time": 13.42
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3396e-04",
+ "loss": 0.6907,
+ "slid_loss": 0.657,
+ "step": 3350,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3393e-04",
+ "loss": 0.6699,
+ "slid_loss": 0.6572,
+ "step": 3351,
+ "time": 11.63
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3389e-04",
+ "loss": 0.6988,
+ "slid_loss": 0.6567,
+ "step": 3352,
+ "time": 12.43
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3386e-04",
+ "loss": 0.6495,
+ "slid_loss": 0.6569,
+ "step": 3353,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3383e-04",
+ "loss": 0.7588,
+ "slid_loss": 0.6582,
+ "step": 3354,
+ "time": 13.75
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3380e-04",
+ "loss": 0.7779,
+ "slid_loss": 0.6591,
+ "step": 3355,
+ "time": 14.09
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3377e-04",
+ "loss": 0.6539,
+ "slid_loss": 0.6594,
+ "step": 3356,
+ "time": 13.2
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": "1.3373e-04",
+ "loss": 0.7241,
+ "slid_loss": 0.6608,
+ "step": 3357,
+ "time": 14.49
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3370e-04",
+ "loss": 0.5662,
+ "slid_loss": 0.6603,
+ "step": 3358,
+ "time": 11.51
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3367e-04",
+ "loss": 0.7151,
+ "slid_loss": 0.6612,
+ "step": 3359,
+ "time": 13.44
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3364e-04",
+ "loss": 0.7163,
+ "slid_loss": 0.661,
+ "step": 3360,
+ "time": 13.19
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3361e-04",
+ "loss": 0.664,
+ "slid_loss": 0.6616,
+ "step": 3361,
+ "time": 13.9
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3358e-04",
+ "loss": 0.5622,
+ "slid_loss": 0.6599,
+ "step": 3362,
+ "time": 11.92
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3354e-04",
+ "loss": 0.6787,
+ "slid_loss": 0.6606,
+ "step": 3363,
+ "time": 13.51
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3351e-04",
+ "loss": 0.6547,
+ "slid_loss": 0.66,
+ "step": 3364,
+ "time": 11.73
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3348e-04",
+ "loss": 0.6794,
+ "slid_loss": 0.6606,
+ "step": 3365,
+ "time": 13.52
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3345e-04",
+ "loss": 0.6005,
+ "slid_loss": 0.6602,
+ "step": 3366,
+ "time": 14.05
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": "1.3342e-04",
+ "loss": 0.6205,
+ "slid_loss": 0.6579,
+ "step": 3367,
+ "time": 14.03
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3339e-04",
+ "loss": 0.6591,
+ "slid_loss": 0.6577,
+ "step": 3368,
+ "time": 13.94
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3335e-04",
+ "loss": 0.6542,
+ "slid_loss": 0.658,
+ "step": 3369,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3332e-04",
+ "loss": 0.6032,
+ "slid_loss": 0.6574,
+ "step": 3370,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3329e-04",
+ "loss": 0.711,
+ "slid_loss": 0.6584,
+ "step": 3371,
+ "time": 13.57
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3326e-04",
+ "loss": 0.7046,
+ "slid_loss": 0.6574,
+ "step": 3372,
+ "time": 13.31
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3323e-04",
+ "loss": 0.6165,
+ "slid_loss": 0.6564,
+ "step": 3373,
+ "time": 12.79
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3320e-04",
+ "loss": 0.6844,
+ "slid_loss": 0.6559,
+ "step": 3374,
+ "time": 13.16
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3317e-04",
+ "loss": 0.6786,
+ "slid_loss": 0.6557,
+ "step": 3375,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3313e-04",
+ "loss": 0.6452,
+ "slid_loss": 0.6558,
+ "step": 3376,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3310e-04",
+ "loss": 0.7043,
+ "slid_loss": 0.6562,
+ "step": 3377,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": "1.3307e-04",
+ "loss": 0.6314,
+ "slid_loss": 0.6556,
+ "step": 3378,
+ "time": 13.06
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3304e-04",
+ "loss": 0.6355,
+ "slid_loss": 0.6544,
+ "step": 3379,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3301e-04",
+ "loss": 0.6619,
+ "slid_loss": 0.6544,
+ "step": 3380,
+ "time": 14.28
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3298e-04",
+ "loss": 0.5561,
+ "slid_loss": 0.6534,
+ "step": 3381,
+ "time": 11.04
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3294e-04",
+ "loss": 0.6913,
+ "slid_loss": 0.6545,
+ "step": 3382,
+ "time": 12.01
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3291e-04",
+ "loss": 0.6596,
+ "slid_loss": 0.6548,
+ "step": 3383,
+ "time": 12.38
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3288e-04",
+ "loss": 0.5446,
+ "slid_loss": 0.6534,
+ "step": 3384,
+ "time": 13.36
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3285e-04",
+ "loss": 0.6033,
+ "slid_loss": 0.6537,
+ "step": 3385,
+ "time": 12.03
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3282e-04",
+ "loss": 0.7078,
+ "slid_loss": 0.6545,
+ "step": 3386,
+ "time": 12.92
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3279e-04",
+ "loss": 0.6779,
+ "slid_loss": 0.6555,
+ "step": 3387,
+ "time": 11.91
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": "1.3276e-04",
+ "loss": 0.6534,
+ "slid_loss": 0.6558,
+ "step": 3388,
+ "time": 12.87
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3272e-04",
+ "loss": 0.6594,
+ "slid_loss": 0.6557,
+ "step": 3389,
+ "time": 13.4
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3269e-04",
+ "loss": 0.572,
+ "slid_loss": 0.6545,
+ "step": 3390,
+ "time": 13.63
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3266e-04",
+ "loss": 0.7381,
+ "slid_loss": 0.6561,
+ "step": 3391,
+ "time": 13.45
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3263e-04",
+ "loss": 0.6987,
+ "slid_loss": 0.6573,
+ "step": 3392,
+ "time": 13.53
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3260e-04",
+ "loss": 0.6492,
+ "slid_loss": 0.6568,
+ "step": 3393,
+ "time": 10.99
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3257e-04",
+ "loss": 0.6451,
+ "slid_loss": 0.6564,
+ "step": 3394,
+ "time": 13.29
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3254e-04",
+ "loss": 0.6647,
+ "slid_loss": 0.6554,
+ "step": 3395,
+ "time": 11.94
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3250e-04",
+ "loss": 0.7301,
+ "slid_loss": 0.6552,
+ "step": 3396,
+ "time": 13.44
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3247e-04",
+ "loss": 0.6051,
+ "slid_loss": 0.6549,
+ "step": 3397,
+ "time": 13.98
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": "1.3244e-04",
+ "loss": 0.6691,
+ "slid_loss": 0.6553,
+ "step": 3398,
+ "time": 13.55
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3241e-04",
+ "loss": 0.5666,
+ "slid_loss": 0.654,
+ "step": 3399,
+ "time": 13.91
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3238e-04",
+ "loss": 0.5507,
+ "slid_loss": 0.6533,
+ "step": 3400,
+ "time": 12.99
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3235e-04",
+ "loss": 0.6497,
+ "slid_loss": 0.6528,
+ "step": 3401,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3232e-04",
+ "loss": 0.6674,
+ "slid_loss": 0.6535,
+ "step": 3402,
+ "time": 12.63
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3228e-04",
+ "loss": 0.6314,
+ "slid_loss": 0.6533,
+ "step": 3403,
+ "time": 13.59
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3225e-04",
+ "loss": 0.7013,
+ "slid_loss": 0.6542,
+ "step": 3404,
+ "time": 14.79
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3222e-04",
+ "loss": 0.5831,
+ "slid_loss": 0.6534,
+ "step": 3405,
+ "time": 13.81
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3219e-04",
+ "loss": 0.6826,
+ "slid_loss": 0.6541,
+ "step": 3406,
+ "time": 13.74
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3216e-04",
+ "loss": 0.6146,
+ "slid_loss": 0.6542,
+ "step": 3407,
+ "time": 11.79
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3213e-04",
+ "loss": 0.6602,
+ "slid_loss": 0.6551,
+ "step": 3408,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": "1.3210e-04",
+ "loss": 0.6843,
+ "slid_loss": 0.6547,
+ "step": 3409,
+ "time": 12.85
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3206e-04",
+ "loss": 0.6797,
+ "slid_loss": 0.6556,
+ "step": 3410,
+ "time": 11.3
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3203e-04",
+ "loss": 0.5883,
+ "slid_loss": 0.6545,
+ "step": 3411,
+ "time": 12.76
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3200e-04",
+ "loss": 0.6742,
+ "slid_loss": 0.6548,
+ "step": 3412,
+ "time": 12.21
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3197e-04",
+ "loss": 0.6273,
+ "slid_loss": 0.6551,
+ "step": 3413,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3194e-04",
+ "loss": 0.6446,
+ "slid_loss": 0.6543,
+ "step": 3414,
+ "time": 13.9
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3191e-04",
+ "loss": 0.6479,
+ "slid_loss": 0.6545,
+ "step": 3415,
+ "time": 11.4
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3188e-04",
+ "loss": 0.5691,
+ "slid_loss": 0.6525,
+ "step": 3416,
+ "time": 13.04
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3185e-04",
+ "loss": 0.6312,
+ "slid_loss": 0.6525,
+ "step": 3417,
+ "time": 12.88
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3181e-04",
+ "loss": 0.7112,
+ "slid_loss": 0.6534,
+ "step": 3418,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": "1.3178e-04",
+ "loss": 0.6582,
+ "slid_loss": 0.6543,
+ "step": 3419,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3175e-04",
+ "loss": 0.6069,
+ "slid_loss": 0.6535,
+ "step": 3420,
+ "time": 11.75
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3172e-04",
+ "loss": 0.6481,
+ "slid_loss": 0.6541,
+ "step": 3421,
+ "time": 12.04
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3169e-04",
+ "loss": 0.675,
+ "slid_loss": 0.6542,
+ "step": 3422,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3166e-04",
+ "loss": 0.6892,
+ "slid_loss": 0.6547,
+ "step": 3423,
+ "time": 13.01
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3163e-04",
+ "loss": 0.6977,
+ "slid_loss": 0.6559,
+ "step": 3424,
+ "time": 11.8
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3160e-04",
+ "loss": 0.6159,
+ "slid_loss": 0.6555,
+ "step": 3425,
+ "time": 14.28
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3157e-04",
+ "loss": 0.5919,
+ "slid_loss": 0.6553,
+ "step": 3426,
+ "time": 11.19
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3153e-04",
+ "loss": 0.5903,
+ "slid_loss": 0.6546,
+ "step": 3427,
+ "time": 12.0
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3150e-04",
+ "loss": 0.6558,
+ "slid_loss": 0.6553,
+ "step": 3428,
+ "time": 13.7
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3147e-04",
+ "loss": 0.5916,
+ "slid_loss": 0.6546,
+ "step": 3429,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": "1.3144e-04",
+ "loss": 0.6759,
+ "slid_loss": 0.6554,
+ "step": 3430,
+ "time": 11.11
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3141e-04",
+ "loss": 0.677,
+ "slid_loss": 0.6554,
+ "step": 3431,
+ "time": 13.03
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3138e-04",
+ "loss": 0.6758,
+ "slid_loss": 0.6558,
+ "step": 3432,
+ "time": 12.06
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3135e-04",
+ "loss": 0.6931,
+ "slid_loss": 0.6554,
+ "step": 3433,
+ "time": 12.81
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3132e-04",
+ "loss": 0.6447,
+ "slid_loss": 0.6547,
+ "step": 3434,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3128e-04",
+ "loss": 0.7286,
+ "slid_loss": 0.6549,
+ "step": 3435,
+ "time": 13.47
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3125e-04",
+ "loss": 0.5822,
+ "slid_loss": 0.6536,
+ "step": 3436,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3122e-04",
+ "loss": 0.6934,
+ "slid_loss": 0.6537,
+ "step": 3437,
+ "time": 13.81
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3119e-04",
+ "loss": 0.7107,
+ "slid_loss": 0.654,
+ "step": 3438,
+ "time": 11.58
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3116e-04",
+ "loss": 0.5944,
+ "slid_loss": 0.6538,
+ "step": 3439,
+ "time": 11.91
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": "1.3113e-04",
+ "loss": 0.6438,
+ "slid_loss": 0.6541,
+ "step": 3440,
+ "time": 13.4
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3110e-04",
+ "loss": 0.6453,
+ "slid_loss": 0.6536,
+ "step": 3441,
+ "time": 13.68
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3107e-04",
+ "loss": 0.6844,
+ "slid_loss": 0.6532,
+ "step": 3442,
+ "time": 13.64
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3104e-04",
+ "loss": 0.6456,
+ "slid_loss": 0.6533,
+ "step": 3443,
+ "time": 13.66
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3101e-04",
+ "loss": 0.6169,
+ "slid_loss": 0.6529,
+ "step": 3444,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3097e-04",
+ "loss": 0.6461,
+ "slid_loss": 0.6529,
+ "step": 3445,
+ "time": 13.82
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3094e-04",
+ "loss": 0.5884,
+ "slid_loss": 0.6518,
+ "step": 3446,
+ "time": 13.13
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3091e-04",
+ "loss": 0.5456,
+ "slid_loss": 0.6514,
+ "step": 3447,
+ "time": 13.59
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3088e-04",
+ "loss": 0.5895,
+ "slid_loss": 0.6508,
+ "step": 3448,
+ "time": 14.6
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3085e-04",
+ "loss": 0.6834,
+ "slid_loss": 0.6515,
+ "step": 3449,
+ "time": 14.53
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": "1.3082e-04",
+ "loss": 0.6749,
+ "slid_loss": 0.6513,
+ "step": 3450,
+ "time": 13.27
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3079e-04",
+ "loss": 0.6424,
+ "slid_loss": 0.651,
+ "step": 3451,
+ "time": 11.83
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3076e-04",
+ "loss": 0.6163,
+ "slid_loss": 0.6502,
+ "step": 3452,
+ "time": 12.81
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3073e-04",
+ "loss": 0.6891,
+ "slid_loss": 0.6506,
+ "step": 3453,
+ "time": 12.36
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3070e-04",
+ "loss": 0.7219,
+ "slid_loss": 0.6502,
+ "step": 3454,
+ "time": 13.8
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3066e-04",
+ "loss": 0.5976,
+ "slid_loss": 0.6484,
+ "step": 3455,
+ "time": 12.75
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3063e-04",
+ "loss": 0.6415,
+ "slid_loss": 0.6483,
+ "step": 3456,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3060e-04",
+ "loss": 0.5952,
+ "slid_loss": 0.647,
+ "step": 3457,
+ "time": 12.77
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3057e-04",
+ "loss": 0.6398,
+ "slid_loss": 0.6478,
+ "step": 3458,
+ "time": 11.73
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3054e-04",
+ "loss": 0.6103,
+ "slid_loss": 0.6467,
+ "step": 3459,
+ "time": 13.61
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3051e-04",
+ "loss": 0.7194,
+ "slid_loss": 0.6467,
+ "step": 3460,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": "1.3048e-04",
+ "loss": 0.6607,
+ "slid_loss": 0.6467,
+ "step": 3461,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3045e-04",
+ "loss": 0.7086,
+ "slid_loss": 0.6482,
+ "step": 3462,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3042e-04",
+ "loss": 0.6653,
+ "slid_loss": 0.648,
+ "step": 3463,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3039e-04",
+ "loss": 0.6694,
+ "slid_loss": 0.6482,
+ "step": 3464,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3036e-04",
+ "loss": 0.6441,
+ "slid_loss": 0.6478,
+ "step": 3465,
+ "time": 11.03
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3032e-04",
+ "loss": 0.585,
+ "slid_loss": 0.6477,
+ "step": 3466,
+ "time": 13.75
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3029e-04",
+ "loss": 0.6171,
+ "slid_loss": 0.6476,
+ "step": 3467,
+ "time": 13.13
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3026e-04",
+ "loss": 0.6303,
+ "slid_loss": 0.6474,
+ "step": 3468,
+ "time": 14.66
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3023e-04",
+ "loss": 0.4937,
+ "slid_loss": 0.6458,
+ "step": 3469,
+ "time": 13.61
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3020e-04",
+ "loss": 0.6803,
+ "slid_loss": 0.6465,
+ "step": 3470,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": "1.3017e-04",
+ "loss": 0.6014,
+ "slid_loss": 0.6454,
+ "step": 3471,
+ "time": 13.64
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.3014e-04",
+ "loss": 0.7617,
+ "slid_loss": 0.646,
+ "step": 3472,
+ "time": 12.31
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.3011e-04",
+ "loss": 0.6268,
+ "slid_loss": 0.6461,
+ "step": 3473,
+ "time": 13.18
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.3008e-04",
+ "loss": 0.5907,
+ "slid_loss": 0.6452,
+ "step": 3474,
+ "time": 12.21
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.3005e-04",
+ "loss": 0.5914,
+ "slid_loss": 0.6443,
+ "step": 3475,
+ "time": 13.45
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.3002e-04",
+ "loss": 0.7348,
+ "slid_loss": 0.6452,
+ "step": 3476,
+ "time": 13.4
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.2999e-04",
+ "loss": 0.5711,
+ "slid_loss": 0.6439,
+ "step": 3477,
+ "time": 13.59
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.2996e-04",
+ "loss": 0.6246,
+ "slid_loss": 0.6438,
+ "step": 3478,
+ "time": 13.92
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.2992e-04",
+ "loss": 0.7258,
+ "slid_loss": 0.6447,
+ "step": 3479,
+ "time": 14.25
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.2989e-04",
+ "loss": 0.6603,
+ "slid_loss": 0.6447,
+ "step": 3480,
+ "time": 14.25
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.2986e-04",
+ "loss": 0.5744,
+ "slid_loss": 0.6449,
+ "step": 3481,
+ "time": 14.26
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": "1.2983e-04",
+ "loss": 0.6549,
+ "slid_loss": 0.6445,
+ "step": 3482,
+ "time": 12.84
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2980e-04",
+ "loss": 0.5536,
+ "slid_loss": 0.6434,
+ "step": 3483,
+ "time": 14.12
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2977e-04",
+ "loss": 0.7199,
+ "slid_loss": 0.6452,
+ "step": 3484,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2974e-04",
+ "loss": 0.6321,
+ "slid_loss": 0.6455,
+ "step": 3485,
+ "time": 12.06
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2971e-04",
+ "loss": 0.6989,
+ "slid_loss": 0.6454,
+ "step": 3486,
+ "time": 11.6
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2968e-04",
+ "loss": 0.6838,
+ "slid_loss": 0.6454,
+ "step": 3487,
+ "time": 12.13
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2965e-04",
+ "loss": 0.5594,
+ "slid_loss": 0.6445,
+ "step": 3488,
+ "time": 12.8
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2962e-04",
+ "loss": 0.7626,
+ "slid_loss": 0.6455,
+ "step": 3489,
+ "time": 11.77
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2959e-04",
+ "loss": 0.7284,
+ "slid_loss": 0.6471,
+ "step": 3490,
+ "time": 13.99
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2956e-04",
+ "loss": 0.5864,
+ "slid_loss": 0.6456,
+ "step": 3491,
+ "time": 13.27
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": "1.2953e-04",
+ "loss": 0.6521,
+ "slid_loss": 0.6451,
+ "step": 3492,
+ "time": 14.04
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2950e-04",
+ "loss": 0.6679,
+ "slid_loss": 0.6453,
+ "step": 3493,
+ "time": 14.05
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2947e-04",
+ "loss": 0.6603,
+ "slid_loss": 0.6455,
+ "step": 3494,
+ "time": 13.55
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2943e-04",
+ "loss": 0.6619,
+ "slid_loss": 0.6454,
+ "step": 3495,
+ "time": 13.17
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2940e-04",
+ "loss": 0.6787,
+ "slid_loss": 0.6449,
+ "step": 3496,
+ "time": 13.31
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2937e-04",
+ "loss": 0.55,
+ "slid_loss": 0.6444,
+ "step": 3497,
+ "time": 11.81
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2934e-04",
+ "loss": 0.6663,
+ "slid_loss": 0.6443,
+ "step": 3498,
+ "time": 13.55
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2931e-04",
+ "loss": 0.6003,
+ "slid_loss": 0.6447,
+ "step": 3499,
+ "time": 13.9
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2928e-04",
+ "loss": 0.6255,
+ "slid_loss": 0.6454,
+ "step": 3500,
+ "time": 12.19
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2925e-04",
+ "loss": 0.7323,
+ "slid_loss": 0.6462,
+ "step": 3501,
+ "time": 12.65
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": "1.2922e-04",
+ "loss": 0.6841,
+ "slid_loss": 0.6464,
+ "step": 3502,
+ "time": 13.95
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2919e-04",
+ "loss": 0.6956,
+ "slid_loss": 0.6471,
+ "step": 3503,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2916e-04",
+ "loss": 0.5467,
+ "slid_loss": 0.6455,
+ "step": 3504,
+ "time": 13.5
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2913e-04",
+ "loss": 0.6758,
+ "slid_loss": 0.6464,
+ "step": 3505,
+ "time": 13.75
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2910e-04",
+ "loss": 0.669,
+ "slid_loss": 0.6463,
+ "step": 3506,
+ "time": 13.45
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2907e-04",
+ "loss": 0.6283,
+ "slid_loss": 0.6464,
+ "step": 3507,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2904e-04",
+ "loss": 0.681,
+ "slid_loss": 0.6466,
+ "step": 3508,
+ "time": 11.13
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2901e-04",
+ "loss": 0.6405,
+ "slid_loss": 0.6462,
+ "step": 3509,
+ "time": 13.62
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2898e-04",
+ "loss": 0.6772,
+ "slid_loss": 0.6462,
+ "step": 3510,
+ "time": 12.84
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2895e-04",
+ "loss": 0.688,
+ "slid_loss": 0.6472,
+ "step": 3511,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2892e-04",
+ "loss": 0.6511,
+ "slid_loss": 0.647,
+ "step": 3512,
+ "time": 12.77
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": "1.2889e-04",
+ "loss": 0.6843,
+ "slid_loss": 0.6475,
+ "step": 3513,
+ "time": 14.25
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2886e-04",
+ "loss": 0.6292,
+ "slid_loss": 0.6474,
+ "step": 3514,
+ "time": 11.59
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2883e-04",
+ "loss": 0.6,
+ "slid_loss": 0.6469,
+ "step": 3515,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2879e-04",
+ "loss": 0.6872,
+ "slid_loss": 0.6481,
+ "step": 3516,
+ "time": 12.88
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2876e-04",
+ "loss": 0.58,
+ "slid_loss": 0.6476,
+ "step": 3517,
+ "time": 13.24
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2873e-04",
+ "loss": 0.6752,
+ "slid_loss": 0.6472,
+ "step": 3518,
+ "time": 12.87
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2870e-04",
+ "loss": 0.628,
+ "slid_loss": 0.6469,
+ "step": 3519,
+ "time": 12.17
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2867e-04",
+ "loss": 0.5855,
+ "slid_loss": 0.6467,
+ "step": 3520,
+ "time": 13.97
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2864e-04",
+ "loss": 0.6558,
+ "slid_loss": 0.6468,
+ "step": 3521,
+ "time": 13.16
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2861e-04",
+ "loss": 0.6584,
+ "slid_loss": 0.6466,
+ "step": 3522,
+ "time": 11.02
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": "1.2858e-04",
+ "loss": 0.5994,
+ "slid_loss": 0.6457,
+ "step": 3523,
+ "time": 12.43
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2855e-04",
+ "loss": 0.7109,
+ "slid_loss": 0.6458,
+ "step": 3524,
+ "time": 11.62
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2852e-04",
+ "loss": 0.6493,
+ "slid_loss": 0.6462,
+ "step": 3525,
+ "time": 12.71
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2849e-04",
+ "loss": 0.7536,
+ "slid_loss": 0.6478,
+ "step": 3526,
+ "time": 12.29
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2846e-04",
+ "loss": 0.5932,
+ "slid_loss": 0.6478,
+ "step": 3527,
+ "time": 13.61
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2843e-04",
+ "loss": 0.5796,
+ "slid_loss": 0.647,
+ "step": 3528,
+ "time": 13.47
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2840e-04",
+ "loss": 0.6818,
+ "slid_loss": 0.6479,
+ "step": 3529,
+ "time": 11.98
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2837e-04",
+ "loss": 0.6215,
+ "slid_loss": 0.6474,
+ "step": 3530,
+ "time": 13.55
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2834e-04",
+ "loss": 0.6867,
+ "slid_loss": 0.6475,
+ "step": 3531,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2831e-04",
+ "loss": 0.6833,
+ "slid_loss": 0.6476,
+ "step": 3532,
+ "time": 13.06
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2828e-04",
+ "loss": 0.666,
+ "slid_loss": 0.6473,
+ "step": 3533,
+ "time": 13.91
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": "1.2825e-04",
+ "loss": 0.6796,
+ "slid_loss": 0.6477,
+ "step": 3534,
+ "time": 11.25
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2822e-04",
+ "loss": 0.6454,
+ "slid_loss": 0.6468,
+ "step": 3535,
+ "time": 11.66
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2819e-04",
+ "loss": 0.6689,
+ "slid_loss": 0.6477,
+ "step": 3536,
+ "time": 11.97
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2816e-04",
+ "loss": 0.6349,
+ "slid_loss": 0.6471,
+ "step": 3537,
+ "time": 13.73
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2813e-04",
+ "loss": 0.6409,
+ "slid_loss": 0.6464,
+ "step": 3538,
+ "time": 12.1
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2810e-04",
+ "loss": 0.5303,
+ "slid_loss": 0.6458,
+ "step": 3539,
+ "time": 12.94
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2807e-04",
+ "loss": 0.6753,
+ "slid_loss": 0.6461,
+ "step": 3540,
+ "time": 13.9
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2804e-04",
+ "loss": 0.5645,
+ "slid_loss": 0.6453,
+ "step": 3541,
+ "time": 12.02
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2801e-04",
+ "loss": 0.6181,
+ "slid_loss": 0.6446,
+ "step": 3542,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2798e-04",
+ "loss": 0.646,
+ "slid_loss": 0.6446,
+ "step": 3543,
+ "time": 13.74
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": "1.2795e-04",
+ "loss": 0.7002,
+ "slid_loss": 0.6454,
+ "step": 3544,
+ "time": 13.46
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2792e-04",
+ "loss": 0.553,
+ "slid_loss": 0.6445,
+ "step": 3545,
+ "time": 14.01
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2789e-04",
+ "loss": 0.559,
+ "slid_loss": 0.6442,
+ "step": 3546,
+ "time": 12.87
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2786e-04",
+ "loss": 0.6336,
+ "slid_loss": 0.6451,
+ "step": 3547,
+ "time": 13.24
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2783e-04",
+ "loss": 0.6509,
+ "slid_loss": 0.6457,
+ "step": 3548,
+ "time": 13.55
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2780e-04",
+ "loss": 0.5755,
+ "slid_loss": 0.6446,
+ "step": 3549,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2777e-04",
+ "loss": 0.6299,
+ "slid_loss": 0.6442,
+ "step": 3550,
+ "time": 14.23
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2774e-04",
+ "loss": 0.6328,
+ "slid_loss": 0.6441,
+ "step": 3551,
+ "time": 12.79
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2771e-04",
+ "loss": 0.652,
+ "slid_loss": 0.6444,
+ "step": 3552,
+ "time": 12.41
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2768e-04",
+ "loss": 0.5931,
+ "slid_loss": 0.6435,
+ "step": 3553,
+ "time": 11.94
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2765e-04",
+ "loss": 0.6126,
+ "slid_loss": 0.6424,
+ "step": 3554,
+ "time": 12.12
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": "1.2762e-04",
+ "loss": 0.6667,
+ "slid_loss": 0.6431,
+ "step": 3555,
+ "time": 13.81
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2759e-04",
+ "loss": 0.547,
+ "slid_loss": 0.6421,
+ "step": 3556,
+ "time": 12.46
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2756e-04",
+ "loss": 0.5948,
+ "slid_loss": 0.6421,
+ "step": 3557,
+ "time": 13.03
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2753e-04",
+ "loss": 0.5367,
+ "slid_loss": 0.6411,
+ "step": 3558,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2750e-04",
+ "loss": 0.6905,
+ "slid_loss": 0.6419,
+ "step": 3559,
+ "time": 14.3
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2747e-04",
+ "loss": 0.6102,
+ "slid_loss": 0.6408,
+ "step": 3560,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2744e-04",
+ "loss": 0.641,
+ "slid_loss": 0.6406,
+ "step": 3561,
+ "time": 12.96
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2741e-04",
+ "loss": 0.7308,
+ "slid_loss": 0.6408,
+ "step": 3562,
+ "time": 13.07
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2738e-04",
+ "loss": 0.6055,
+ "slid_loss": 0.6402,
+ "step": 3563,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2735e-04",
+ "loss": 0.6004,
+ "slid_loss": 0.6395,
+ "step": 3564,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": "1.2732e-04",
+ "loss": 0.6324,
+ "slid_loss": 0.6394,
+ "step": 3565,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2729e-04",
+ "loss": 0.7346,
+ "slid_loss": 0.6409,
+ "step": 3566,
+ "time": 13.35
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2726e-04",
+ "loss": 0.5746,
+ "slid_loss": 0.6405,
+ "step": 3567,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2723e-04",
+ "loss": 0.683,
+ "slid_loss": 0.641,
+ "step": 3568,
+ "time": 13.18
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2720e-04",
+ "loss": 0.7055,
+ "slid_loss": 0.6431,
+ "step": 3569,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2717e-04",
+ "loss": 0.5434,
+ "slid_loss": 0.6418,
+ "step": 3570,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2714e-04",
+ "loss": 0.7103,
+ "slid_loss": 0.6429,
+ "step": 3571,
+ "time": 13.42
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2711e-04",
+ "loss": 0.6043,
+ "slid_loss": 0.6413,
+ "step": 3572,
+ "time": 10.97
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2708e-04",
+ "loss": 0.5866,
+ "slid_loss": 0.6409,
+ "step": 3573,
+ "time": 14.03
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2705e-04",
+ "loss": 0.6503,
+ "slid_loss": 0.6415,
+ "step": 3574,
+ "time": 12.86
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": "1.2702e-04",
+ "loss": 0.6379,
+ "slid_loss": 0.642,
+ "step": 3575,
+ "time": 13.66
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2699e-04",
+ "loss": 0.6156,
+ "slid_loss": 0.6408,
+ "step": 3576,
+ "time": 12.8
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2696e-04",
+ "loss": 0.6556,
+ "slid_loss": 0.6416,
+ "step": 3577,
+ "time": 11.37
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2693e-04",
+ "loss": 0.5855,
+ "slid_loss": 0.6412,
+ "step": 3578,
+ "time": 13.44
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2690e-04",
+ "loss": 0.5914,
+ "slid_loss": 0.6399,
+ "step": 3579,
+ "time": 12.26
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2687e-04",
+ "loss": 0.7214,
+ "slid_loss": 0.6405,
+ "step": 3580,
+ "time": 13.29
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2684e-04",
+ "loss": 0.6035,
+ "slid_loss": 0.6408,
+ "step": 3581,
+ "time": 11.65
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2681e-04",
+ "loss": 0.6409,
+ "slid_loss": 0.6406,
+ "step": 3582,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2678e-04",
+ "loss": 0.6081,
+ "slid_loss": 0.6412,
+ "step": 3583,
+ "time": 13.67
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2675e-04",
+ "loss": 0.6786,
+ "slid_loss": 0.6408,
+ "step": 3584,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2672e-04",
+ "loss": 0.6357,
+ "slid_loss": 0.6408,
+ "step": 3585,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": "1.2669e-04",
+ "loss": 0.7018,
+ "slid_loss": 0.6408,
+ "step": 3586,
+ "time": 14.03
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2666e-04",
+ "loss": 0.5928,
+ "slid_loss": 0.6399,
+ "step": 3587,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2663e-04",
+ "loss": 0.6409,
+ "slid_loss": 0.6407,
+ "step": 3588,
+ "time": 14.53
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2660e-04",
+ "loss": 0.5577,
+ "slid_loss": 0.6387,
+ "step": 3589,
+ "time": 12.78
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2657e-04",
+ "loss": 0.6085,
+ "slid_loss": 0.6375,
+ "step": 3590,
+ "time": 13.24
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2654e-04",
+ "loss": 0.6134,
+ "slid_loss": 0.6378,
+ "step": 3591,
+ "time": 13.89
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2652e-04",
+ "loss": 0.6011,
+ "slid_loss": 0.6372,
+ "step": 3592,
+ "time": 12.05
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2649e-04",
+ "loss": 0.6214,
+ "slid_loss": 0.6368,
+ "step": 3593,
+ "time": 11.53
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2646e-04",
+ "loss": 0.6803,
+ "slid_loss": 0.637,
+ "step": 3594,
+ "time": 12.28
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2643e-04",
+ "loss": 0.5855,
+ "slid_loss": 0.6362,
+ "step": 3595,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": "1.2640e-04",
+ "loss": 0.6124,
+ "slid_loss": 0.6356,
+ "step": 3596,
+ "time": 13.04
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2637e-04",
+ "loss": 0.5724,
+ "slid_loss": 0.6358,
+ "step": 3597,
+ "time": 13.99
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2634e-04",
+ "loss": 0.6265,
+ "slid_loss": 0.6354,
+ "step": 3598,
+ "time": 12.68
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2631e-04",
+ "loss": 0.674,
+ "slid_loss": 0.6361,
+ "step": 3599,
+ "time": 13.8
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2628e-04",
+ "loss": 0.6898,
+ "slid_loss": 0.6368,
+ "step": 3600,
+ "time": 11.52
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2625e-04",
+ "loss": 0.6118,
+ "slid_loss": 0.6356,
+ "step": 3601,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2622e-04",
+ "loss": 0.5733,
+ "slid_loss": 0.6344,
+ "step": 3602,
+ "time": 11.86
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2619e-04",
+ "loss": 0.5743,
+ "slid_loss": 0.6332,
+ "step": 3603,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2616e-04",
+ "loss": 0.6987,
+ "slid_loss": 0.6348,
+ "step": 3604,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2613e-04",
+ "loss": 0.6988,
+ "slid_loss": 0.635,
+ "step": 3605,
+ "time": 13.96
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2610e-04",
+ "loss": 0.6629,
+ "slid_loss": 0.6349,
+ "step": 3606,
+ "time": 12.89
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": "1.2607e-04",
+ "loss": 0.6779,
+ "slid_loss": 0.6354,
+ "step": 3607,
+ "time": 14.14
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2604e-04",
+ "loss": 0.6764,
+ "slid_loss": 0.6354,
+ "step": 3608,
+ "time": 13.97
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2601e-04",
+ "loss": 0.6801,
+ "slid_loss": 0.6358,
+ "step": 3609,
+ "time": 12.98
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2598e-04",
+ "loss": 0.6252,
+ "slid_loss": 0.6353,
+ "step": 3610,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2595e-04",
+ "loss": 0.7153,
+ "slid_loss": 0.6355,
+ "step": 3611,
+ "time": 13.93
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2593e-04",
+ "loss": 0.6147,
+ "slid_loss": 0.6352,
+ "step": 3612,
+ "time": 13.81
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2590e-04",
+ "loss": 0.6994,
+ "slid_loss": 0.6353,
+ "step": 3613,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2587e-04",
+ "loss": 0.563,
+ "slid_loss": 0.6346,
+ "step": 3614,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2584e-04",
+ "loss": 0.6523,
+ "slid_loss": 0.6352,
+ "step": 3615,
+ "time": 11.88
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2581e-04",
+ "loss": 0.6654,
+ "slid_loss": 0.635,
+ "step": 3616,
+ "time": 13.63
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": "1.2578e-04",
+ "loss": 0.6297,
+ "slid_loss": 0.6355,
+ "step": 3617,
+ "time": 12.13
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2575e-04",
+ "loss": 0.5472,
+ "slid_loss": 0.6342,
+ "step": 3618,
+ "time": 12.92
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2572e-04",
+ "loss": 0.5821,
+ "slid_loss": 0.6337,
+ "step": 3619,
+ "time": 11.27
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2569e-04",
+ "loss": 0.6775,
+ "slid_loss": 0.6346,
+ "step": 3620,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2566e-04",
+ "loss": 0.5538,
+ "slid_loss": 0.6336,
+ "step": 3621,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2563e-04",
+ "loss": 0.7229,
+ "slid_loss": 0.6343,
+ "step": 3622,
+ "time": 13.33
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2560e-04",
+ "loss": 0.7131,
+ "slid_loss": 0.6354,
+ "step": 3623,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2557e-04",
+ "loss": 0.6274,
+ "slid_loss": 0.6346,
+ "step": 3624,
+ "time": 13.4
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2554e-04",
+ "loss": 0.6581,
+ "slid_loss": 0.6346,
+ "step": 3625,
+ "time": 13.51
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2551e-04",
+ "loss": 0.6739,
+ "slid_loss": 0.6338,
+ "step": 3626,
+ "time": 13.99
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": "1.2549e-04",
+ "loss": 0.6736,
+ "slid_loss": 0.6347,
+ "step": 3627,
+ "time": 12.33
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2546e-04",
+ "loss": 0.6485,
+ "slid_loss": 0.6353,
+ "step": 3628,
+ "time": 14.18
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2543e-04",
+ "loss": 0.6916,
+ "slid_loss": 0.6354,
+ "step": 3629,
+ "time": 12.03
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2540e-04",
+ "loss": 0.623,
+ "slid_loss": 0.6355,
+ "step": 3630,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2537e-04",
+ "loss": 0.6181,
+ "slid_loss": 0.6348,
+ "step": 3631,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2534e-04",
+ "loss": 0.7341,
+ "slid_loss": 0.6353,
+ "step": 3632,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2531e-04",
+ "loss": 0.6094,
+ "slid_loss": 0.6347,
+ "step": 3633,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2528e-04",
+ "loss": 0.6378,
+ "slid_loss": 0.6343,
+ "step": 3634,
+ "time": 13.37
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2525e-04",
+ "loss": 0.5685,
+ "slid_loss": 0.6335,
+ "step": 3635,
+ "time": 10.91
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2522e-04",
+ "loss": 0.7015,
+ "slid_loss": 0.6339,
+ "step": 3636,
+ "time": 11.69
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2519e-04",
+ "loss": 0.5585,
+ "slid_loss": 0.6331,
+ "step": 3637,
+ "time": 12.72
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": "1.2516e-04",
+ "loss": 0.6683,
+ "slid_loss": 0.6334,
+ "step": 3638,
+ "time": 11.48
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2514e-04",
+ "loss": 0.6163,
+ "slid_loss": 0.6342,
+ "step": 3639,
+ "time": 13.91
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2511e-04",
+ "loss": 0.6022,
+ "slid_loss": 0.6335,
+ "step": 3640,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2508e-04",
+ "loss": 0.6531,
+ "slid_loss": 0.6344,
+ "step": 3641,
+ "time": 13.4
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2505e-04",
+ "loss": 0.5738,
+ "slid_loss": 0.6339,
+ "step": 3642,
+ "time": 13.89
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2502e-04",
+ "loss": 0.6652,
+ "slid_loss": 0.6341,
+ "step": 3643,
+ "time": 11.3
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2499e-04",
+ "loss": 0.6867,
+ "slid_loss": 0.634,
+ "step": 3644,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2496e-04",
+ "loss": 0.6619,
+ "slid_loss": 0.6351,
+ "step": 3645,
+ "time": 13.24
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2493e-04",
+ "loss": 0.8242,
+ "slid_loss": 0.6377,
+ "step": 3646,
+ "time": 14.06
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2490e-04",
+ "loss": 0.5785,
+ "slid_loss": 0.6372,
+ "step": 3647,
+ "time": 13.32
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": "1.2487e-04",
+ "loss": 0.6673,
+ "slid_loss": 0.6373,
+ "step": 3648,
+ "time": 13.52
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2485e-04",
+ "loss": 0.7464,
+ "slid_loss": 0.6391,
+ "step": 3649,
+ "time": 12.13
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2482e-04",
+ "loss": 0.6248,
+ "slid_loss": 0.639,
+ "step": 3650,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2479e-04",
+ "loss": 0.6646,
+ "slid_loss": 0.6393,
+ "step": 3651,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2476e-04",
+ "loss": 0.6298,
+ "slid_loss": 0.6391,
+ "step": 3652,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2473e-04",
+ "loss": 0.5877,
+ "slid_loss": 0.639,
+ "step": 3653,
+ "time": 13.19
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2470e-04",
+ "loss": 0.6441,
+ "slid_loss": 0.6394,
+ "step": 3654,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2467e-04",
+ "loss": 0.7023,
+ "slid_loss": 0.6397,
+ "step": 3655,
+ "time": 12.45
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2464e-04",
+ "loss": 0.646,
+ "slid_loss": 0.6407,
+ "step": 3656,
+ "time": 13.68
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2461e-04",
+ "loss": 0.6667,
+ "slid_loss": 0.6414,
+ "step": 3657,
+ "time": 13.72
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2458e-04",
+ "loss": 0.5873,
+ "slid_loss": 0.6419,
+ "step": 3658,
+ "time": 13.17
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": "1.2456e-04",
+ "loss": 0.6373,
+ "slid_loss": 0.6414,
+ "step": 3659,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2453e-04",
+ "loss": 0.5847,
+ "slid_loss": 0.6411,
+ "step": 3660,
+ "time": 11.32
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2450e-04",
+ "loss": 0.602,
+ "slid_loss": 0.6408,
+ "step": 3661,
+ "time": 12.84
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2447e-04",
+ "loss": 0.6551,
+ "slid_loss": 0.64,
+ "step": 3662,
+ "time": 13.73
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2444e-04",
+ "loss": 0.6408,
+ "slid_loss": 0.6403,
+ "step": 3663,
+ "time": 13.31
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2441e-04",
+ "loss": 0.61,
+ "slid_loss": 0.6404,
+ "step": 3664,
+ "time": 11.63
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2438e-04",
+ "loss": 0.5255,
+ "slid_loss": 0.6394,
+ "step": 3665,
+ "time": 13.72
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2435e-04",
+ "loss": 0.6167,
+ "slid_loss": 0.6382,
+ "step": 3666,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2433e-04",
+ "loss": 0.5792,
+ "slid_loss": 0.6382,
+ "step": 3667,
+ "time": 11.14
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2430e-04",
+ "loss": 0.546,
+ "slid_loss": 0.6369,
+ "step": 3668,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": "1.2427e-04",
+ "loss": 0.6492,
+ "slid_loss": 0.6363,
+ "step": 3669,
+ "time": 13.45
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2424e-04",
+ "loss": 0.6068,
+ "slid_loss": 0.6369,
+ "step": 3670,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2421e-04",
+ "loss": 0.6387,
+ "slid_loss": 0.6362,
+ "step": 3671,
+ "time": 11.7
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2418e-04",
+ "loss": 0.6768,
+ "slid_loss": 0.637,
+ "step": 3672,
+ "time": 12.7
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2415e-04",
+ "loss": 0.55,
+ "slid_loss": 0.6366,
+ "step": 3673,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2412e-04",
+ "loss": 0.5133,
+ "slid_loss": 0.6352,
+ "step": 3674,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2410e-04",
+ "loss": 0.6089,
+ "slid_loss": 0.6349,
+ "step": 3675,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2407e-04",
+ "loss": 0.6283,
+ "slid_loss": 0.6351,
+ "step": 3676,
+ "time": 13.16
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2404e-04",
+ "loss": 0.5439,
+ "slid_loss": 0.6339,
+ "step": 3677,
+ "time": 14.07
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2401e-04",
+ "loss": 0.6336,
+ "slid_loss": 0.6344,
+ "step": 3678,
+ "time": 13.71
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": "1.2398e-04",
+ "loss": 0.5126,
+ "slid_loss": 0.6336,
+ "step": 3679,
+ "time": 13.02
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2395e-04",
+ "loss": 0.6213,
+ "slid_loss": 0.6326,
+ "step": 3680,
+ "time": 13.56
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2392e-04",
+ "loss": 0.6166,
+ "slid_loss": 0.6328,
+ "step": 3681,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2390e-04",
+ "loss": 0.5478,
+ "slid_loss": 0.6318,
+ "step": 3682,
+ "time": 12.26
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2387e-04",
+ "loss": 0.6438,
+ "slid_loss": 0.6322,
+ "step": 3683,
+ "time": 13.77
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2384e-04",
+ "loss": 0.6406,
+ "slid_loss": 0.6318,
+ "step": 3684,
+ "time": 11.6
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2381e-04",
+ "loss": 0.7252,
+ "slid_loss": 0.6327,
+ "step": 3685,
+ "time": 14.03
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2378e-04",
+ "loss": 0.5925,
+ "slid_loss": 0.6316,
+ "step": 3686,
+ "time": 12.37
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2375e-04",
+ "loss": 0.6093,
+ "slid_loss": 0.6318,
+ "step": 3687,
+ "time": 13.7
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2372e-04",
+ "loss": 0.5485,
+ "slid_loss": 0.6308,
+ "step": 3688,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2370e-04",
+ "loss": 0.5689,
+ "slid_loss": 0.631,
+ "step": 3689,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": "1.2367e-04",
+ "loss": 0.5983,
+ "slid_loss": 0.6309,
+ "step": 3690,
+ "time": 13.04
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2364e-04",
+ "loss": 0.7227,
+ "slid_loss": 0.6319,
+ "step": 3691,
+ "time": 13.81
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2361e-04",
+ "loss": 0.6128,
+ "slid_loss": 0.6321,
+ "step": 3692,
+ "time": 11.35
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2358e-04",
+ "loss": 0.5602,
+ "slid_loss": 0.6315,
+ "step": 3693,
+ "time": 11.51
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2355e-04",
+ "loss": 0.6116,
+ "slid_loss": 0.6308,
+ "step": 3694,
+ "time": 11.93
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2352e-04",
+ "loss": 0.572,
+ "slid_loss": 0.6306,
+ "step": 3695,
+ "time": 12.26
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2350e-04",
+ "loss": 0.6107,
+ "slid_loss": 0.6306,
+ "step": 3696,
+ "time": 12.31
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2347e-04",
+ "loss": 0.5969,
+ "slid_loss": 0.6309,
+ "step": 3697,
+ "time": 14.59
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2344e-04",
+ "loss": 0.6376,
+ "slid_loss": 0.631,
+ "step": 3698,
+ "time": 14.58
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2341e-04",
+ "loss": 0.8,
+ "slid_loss": 0.6322,
+ "step": 3699,
+ "time": 13.64
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": "1.2338e-04",
+ "loss": 0.6438,
+ "slid_loss": 0.6318,
+ "step": 3700,
+ "time": 13.95
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2335e-04",
+ "loss": 0.5858,
+ "slid_loss": 0.6315,
+ "step": 3701,
+ "time": 13.24
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2333e-04",
+ "loss": 0.6567,
+ "slid_loss": 0.6323,
+ "step": 3702,
+ "time": 11.28
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2330e-04",
+ "loss": 0.5824,
+ "slid_loss": 0.6324,
+ "step": 3703,
+ "time": 14.04
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2327e-04",
+ "loss": 0.6941,
+ "slid_loss": 0.6324,
+ "step": 3704,
+ "time": 12.66
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2324e-04",
+ "loss": 0.6124,
+ "slid_loss": 0.6315,
+ "step": 3705,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2321e-04",
+ "loss": 0.6248,
+ "slid_loss": 0.6311,
+ "step": 3706,
+ "time": 13.33
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2318e-04",
+ "loss": 0.5926,
+ "slid_loss": 0.6303,
+ "step": 3707,
+ "time": 13.66
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2316e-04",
+ "loss": 0.6705,
+ "slid_loss": 0.6302,
+ "step": 3708,
+ "time": 12.8
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2313e-04",
+ "loss": 0.6563,
+ "slid_loss": 0.63,
+ "step": 3709,
+ "time": 12.94
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2310e-04",
+ "loss": 0.7114,
+ "slid_loss": 0.6308,
+ "step": 3710,
+ "time": 13.95
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": "1.2307e-04",
+ "loss": 0.6356,
+ "slid_loss": 0.6301,
+ "step": 3711,
+ "time": 13.8
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2304e-04",
+ "loss": 0.642,
+ "slid_loss": 0.6303,
+ "step": 3712,
+ "time": 14.56
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2301e-04",
+ "loss": 0.6729,
+ "slid_loss": 0.6301,
+ "step": 3713,
+ "time": 13.29
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2299e-04",
+ "loss": 0.6034,
+ "slid_loss": 0.6305,
+ "step": 3714,
+ "time": 13.58
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2296e-04",
+ "loss": 0.667,
+ "slid_loss": 0.6306,
+ "step": 3715,
+ "time": 13.85
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2293e-04",
+ "loss": 0.6051,
+ "slid_loss": 0.63,
+ "step": 3716,
+ "time": 12.23
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2290e-04",
+ "loss": 0.6163,
+ "slid_loss": 0.6299,
+ "step": 3717,
+ "time": 13.67
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2287e-04",
+ "loss": 0.6735,
+ "slid_loss": 0.6311,
+ "step": 3718,
+ "time": 12.77
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2284e-04",
+ "loss": 0.5517,
+ "slid_loss": 0.6308,
+ "step": 3719,
+ "time": 11.82
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2282e-04",
+ "loss": 0.6306,
+ "slid_loss": 0.6304,
+ "step": 3720,
+ "time": 12.95
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": "1.2279e-04",
+ "loss": 0.6732,
+ "slid_loss": 0.6316,
+ "step": 3721,
+ "time": 12.92
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2276e-04",
+ "loss": 0.5174,
+ "slid_loss": 0.6295,
+ "step": 3722,
+ "time": 12.29
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2273e-04",
+ "loss": 0.6443,
+ "slid_loss": 0.6288,
+ "step": 3723,
+ "time": 12.19
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2270e-04",
+ "loss": 0.6409,
+ "slid_loss": 0.629,
+ "step": 3724,
+ "time": 11.24
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2268e-04",
+ "loss": 0.656,
+ "slid_loss": 0.6289,
+ "step": 3725,
+ "time": 13.58
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2265e-04",
+ "loss": 0.5749,
+ "slid_loss": 0.6279,
+ "step": 3726,
+ "time": 12.98
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2262e-04",
+ "loss": 0.5907,
+ "slid_loss": 0.6271,
+ "step": 3727,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2259e-04",
+ "loss": 0.6519,
+ "slid_loss": 0.6271,
+ "step": 3728,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2256e-04",
+ "loss": 0.5387,
+ "slid_loss": 0.6256,
+ "step": 3729,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2254e-04",
+ "loss": 0.566,
+ "slid_loss": 0.625,
+ "step": 3730,
+ "time": 11.62
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": "1.2251e-04",
+ "loss": 0.5706,
+ "slid_loss": 0.6246,
+ "step": 3731,
+ "time": 13.03
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2248e-04",
+ "loss": 0.612,
+ "slid_loss": 0.6233,
+ "step": 3732,
+ "time": 13.36
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2245e-04",
+ "loss": 0.6066,
+ "slid_loss": 0.6233,
+ "step": 3733,
+ "time": 15.02
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2242e-04",
+ "loss": 0.6042,
+ "slid_loss": 0.623,
+ "step": 3734,
+ "time": 13.84
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2240e-04",
+ "loss": 0.604,
+ "slid_loss": 0.6233,
+ "step": 3735,
+ "time": 11.83
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2237e-04",
+ "loss": 0.6495,
+ "slid_loss": 0.6228,
+ "step": 3736,
+ "time": 14.76
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2234e-04",
+ "loss": 0.6151,
+ "slid_loss": 0.6234,
+ "step": 3737,
+ "time": 11.74
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2231e-04",
+ "loss": 0.6755,
+ "slid_loss": 0.6235,
+ "step": 3738,
+ "time": 13.36
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2228e-04",
+ "loss": 0.5494,
+ "slid_loss": 0.6228,
+ "step": 3739,
+ "time": 12.73
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2226e-04",
+ "loss": 0.6258,
+ "slid_loss": 0.623,
+ "step": 3740,
+ "time": 10.9
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2223e-04",
+ "loss": 0.6919,
+ "slid_loss": 0.6234,
+ "step": 3741,
+ "time": 13.11
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": "1.2220e-04",
+ "loss": 0.5469,
+ "slid_loss": 0.6231,
+ "step": 3742,
+ "time": 13.61
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2217e-04",
+ "loss": 0.6807,
+ "slid_loss": 0.6233,
+ "step": 3743,
+ "time": 13.5
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2214e-04",
+ "loss": 0.5979,
+ "slid_loss": 0.6224,
+ "step": 3744,
+ "time": 12.86
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2212e-04",
+ "loss": 0.609,
+ "slid_loss": 0.6219,
+ "step": 3745,
+ "time": 14.03
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2209e-04",
+ "loss": 0.6616,
+ "slid_loss": 0.6203,
+ "step": 3746,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2206e-04",
+ "loss": 0.6405,
+ "slid_loss": 0.6209,
+ "step": 3747,
+ "time": 13.74
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2203e-04",
+ "loss": 0.6997,
+ "slid_loss": 0.6212,
+ "step": 3748,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2201e-04",
+ "loss": 0.5952,
+ "slid_loss": 0.6197,
+ "step": 3749,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2198e-04",
+ "loss": 0.6096,
+ "slid_loss": 0.6195,
+ "step": 3750,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2195e-04",
+ "loss": 0.6262,
+ "slid_loss": 0.6192,
+ "step": 3751,
+ "time": 12.07
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": "1.2192e-04",
+ "loss": 0.7387,
+ "slid_loss": 0.6202,
+ "step": 3752,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2189e-04",
+ "loss": 0.6803,
+ "slid_loss": 0.6212,
+ "step": 3753,
+ "time": 11.68
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2187e-04",
+ "loss": 0.585,
+ "slid_loss": 0.6206,
+ "step": 3754,
+ "time": 11.82
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2184e-04",
+ "loss": 0.7339,
+ "slid_loss": 0.6209,
+ "step": 3755,
+ "time": 12.84
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2181e-04",
+ "loss": 0.5688,
+ "slid_loss": 0.6201,
+ "step": 3756,
+ "time": 11.95
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2178e-04",
+ "loss": 0.664,
+ "slid_loss": 0.6201,
+ "step": 3757,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2176e-04",
+ "loss": 0.5742,
+ "slid_loss": 0.62,
+ "step": 3758,
+ "time": 14.4
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2173e-04",
+ "loss": 0.6069,
+ "slid_loss": 0.6197,
+ "step": 3759,
+ "time": 13.93
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2170e-04",
+ "loss": 0.6834,
+ "slid_loss": 0.6206,
+ "step": 3760,
+ "time": 13.6
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2167e-04",
+ "loss": 0.6422,
+ "slid_loss": 0.621,
+ "step": 3761,
+ "time": 11.37
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2165e-04",
+ "loss": 0.5186,
+ "slid_loss": 0.6197,
+ "step": 3762,
+ "time": 14.09
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": "1.2162e-04",
+ "loss": 0.6129,
+ "slid_loss": 0.6194,
+ "step": 3763,
+ "time": 10.97
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2159e-04",
+ "loss": 0.6105,
+ "slid_loss": 0.6194,
+ "step": 3764,
+ "time": 10.95
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2156e-04",
+ "loss": 0.5842,
+ "slid_loss": 0.62,
+ "step": 3765,
+ "time": 14.08
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2154e-04",
+ "loss": 0.5901,
+ "slid_loss": 0.6197,
+ "step": 3766,
+ "time": 13.8
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2151e-04",
+ "loss": 0.6424,
+ "slid_loss": 0.6204,
+ "step": 3767,
+ "time": 12.87
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2148e-04",
+ "loss": 0.6245,
+ "slid_loss": 0.6211,
+ "step": 3768,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2145e-04",
+ "loss": 0.5785,
+ "slid_loss": 0.6204,
+ "step": 3769,
+ "time": 13.29
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2143e-04",
+ "loss": 0.5182,
+ "slid_loss": 0.6196,
+ "step": 3770,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2140e-04",
+ "loss": 0.5652,
+ "slid_loss": 0.6188,
+ "step": 3771,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2137e-04",
+ "loss": 0.5517,
+ "slid_loss": 0.6176,
+ "step": 3772,
+ "time": 13.37
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": "1.2134e-04",
+ "loss": 0.5413,
+ "slid_loss": 0.6175,
+ "step": 3773,
+ "time": 13.5
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2132e-04",
+ "loss": 0.6788,
+ "slid_loss": 0.6191,
+ "step": 3774,
+ "time": 11.17
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2129e-04",
+ "loss": 0.6935,
+ "slid_loss": 0.62,
+ "step": 3775,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2126e-04",
+ "loss": 0.5992,
+ "slid_loss": 0.6197,
+ "step": 3776,
+ "time": 13.57
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2123e-04",
+ "loss": 0.645,
+ "slid_loss": 0.6207,
+ "step": 3777,
+ "time": 14.08
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2121e-04",
+ "loss": 0.6072,
+ "slid_loss": 0.6204,
+ "step": 3778,
+ "time": 13.7
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2118e-04",
+ "loss": 0.6472,
+ "slid_loss": 0.6218,
+ "step": 3779,
+ "time": 13.99
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2115e-04",
+ "loss": 0.5,
+ "slid_loss": 0.6206,
+ "step": 3780,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2112e-04",
+ "loss": 0.5584,
+ "slid_loss": 0.62,
+ "step": 3781,
+ "time": 13.98
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2110e-04",
+ "loss": 0.6658,
+ "slid_loss": 0.6212,
+ "step": 3782,
+ "time": 13.31
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2107e-04",
+ "loss": 0.6418,
+ "slid_loss": 0.6212,
+ "step": 3783,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": "1.2104e-04",
+ "loss": 0.675,
+ "slid_loss": 0.6215,
+ "step": 3784,
+ "time": 11.53
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2101e-04",
+ "loss": 0.6514,
+ "slid_loss": 0.6208,
+ "step": 3785,
+ "time": 13.82
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2099e-04",
+ "loss": 0.6823,
+ "slid_loss": 0.6217,
+ "step": 3786,
+ "time": 12.39
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2096e-04",
+ "loss": 0.7275,
+ "slid_loss": 0.6228,
+ "step": 3787,
+ "time": 11.63
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2093e-04",
+ "loss": 0.6916,
+ "slid_loss": 0.6243,
+ "step": 3788,
+ "time": 12.85
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2090e-04",
+ "loss": 0.6152,
+ "slid_loss": 0.6247,
+ "step": 3789,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2088e-04",
+ "loss": 0.4926,
+ "slid_loss": 0.6237,
+ "step": 3790,
+ "time": 12.67
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2085e-04",
+ "loss": 0.6383,
+ "slid_loss": 0.6228,
+ "step": 3791,
+ "time": 13.42
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2082e-04",
+ "loss": 0.632,
+ "slid_loss": 0.623,
+ "step": 3792,
+ "time": 11.9
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2080e-04",
+ "loss": 0.6198,
+ "slid_loss": 0.6236,
+ "step": 3793,
+ "time": 13.03
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": "1.2077e-04",
+ "loss": 0.6196,
+ "slid_loss": 0.6237,
+ "step": 3794,
+ "time": 13.44
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2074e-04",
+ "loss": 0.5489,
+ "slid_loss": 0.6235,
+ "step": 3795,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2071e-04",
+ "loss": 0.7149,
+ "slid_loss": 0.6245,
+ "step": 3796,
+ "time": 14.6
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2069e-04",
+ "loss": 0.626,
+ "slid_loss": 0.6248,
+ "step": 3797,
+ "time": 13.34
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2066e-04",
+ "loss": 0.6512,
+ "slid_loss": 0.6249,
+ "step": 3798,
+ "time": 13.53
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2063e-04",
+ "loss": 0.5921,
+ "slid_loss": 0.6229,
+ "step": 3799,
+ "time": 13.19
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2061e-04",
+ "loss": 0.7204,
+ "slid_loss": 0.6236,
+ "step": 3800,
+ "time": 13.33
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2058e-04",
+ "loss": 0.6504,
+ "slid_loss": 0.6243,
+ "step": 3801,
+ "time": 12.73
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2055e-04",
+ "loss": 0.6032,
+ "slid_loss": 0.6237,
+ "step": 3802,
+ "time": 13.5
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2052e-04",
+ "loss": 0.551,
+ "slid_loss": 0.6234,
+ "step": 3803,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": "1.2050e-04",
+ "loss": 0.5887,
+ "slid_loss": 0.6224,
+ "step": 3804,
+ "time": 13.85
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2047e-04",
+ "loss": 0.6136,
+ "slid_loss": 0.6224,
+ "step": 3805,
+ "time": 13.32
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2044e-04",
+ "loss": 0.7176,
+ "slid_loss": 0.6233,
+ "step": 3806,
+ "time": 12.9
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2042e-04",
+ "loss": 0.6658,
+ "slid_loss": 0.624,
+ "step": 3807,
+ "time": 14.51
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2039e-04",
+ "loss": 0.5836,
+ "slid_loss": 0.6232,
+ "step": 3808,
+ "time": 12.71
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2036e-04",
+ "loss": 0.6107,
+ "slid_loss": 0.6227,
+ "step": 3809,
+ "time": 10.6
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2033e-04",
+ "loss": 0.6332,
+ "slid_loss": 0.6219,
+ "step": 3810,
+ "time": 13.2
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2031e-04",
+ "loss": 0.5605,
+ "slid_loss": 0.6212,
+ "step": 3811,
+ "time": 12.94
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2028e-04",
+ "loss": 0.5722,
+ "slid_loss": 0.6205,
+ "step": 3812,
+ "time": 14.27
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2025e-04",
+ "loss": 0.5751,
+ "slid_loss": 0.6195,
+ "step": 3813,
+ "time": 13.57
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2023e-04",
+ "loss": 0.5974,
+ "slid_loss": 0.6194,
+ "step": 3814,
+ "time": 13.07
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": "1.2020e-04",
+ "loss": 0.6686,
+ "slid_loss": 0.6195,
+ "step": 3815,
+ "time": 13.2
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.2017e-04",
+ "loss": 0.5794,
+ "slid_loss": 0.6192,
+ "step": 3816,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.2015e-04",
+ "loss": 0.6551,
+ "slid_loss": 0.6196,
+ "step": 3817,
+ "time": 13.16
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.2012e-04",
+ "loss": 0.6355,
+ "slid_loss": 0.6192,
+ "step": 3818,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.2009e-04",
+ "loss": 0.6196,
+ "slid_loss": 0.6199,
+ "step": 3819,
+ "time": 11.31
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.2007e-04",
+ "loss": 0.5881,
+ "slid_loss": 0.6195,
+ "step": 3820,
+ "time": 12.24
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.2004e-04",
+ "loss": 0.6033,
+ "slid_loss": 0.6188,
+ "step": 3821,
+ "time": 13.01
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.2001e-04",
+ "loss": 0.567,
+ "slid_loss": 0.6193,
+ "step": 3822,
+ "time": 13.61
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.1998e-04",
+ "loss": 0.5563,
+ "slid_loss": 0.6184,
+ "step": 3823,
+ "time": 13.35
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.1996e-04",
+ "loss": 0.5843,
+ "slid_loss": 0.6178,
+ "step": 3824,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": "1.1993e-04",
+ "loss": 0.654,
+ "slid_loss": 0.6178,
+ "step": 3825,
+ "time": 13.14
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1990e-04",
+ "loss": 0.6181,
+ "slid_loss": 0.6182,
+ "step": 3826,
+ "time": 11.42
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1988e-04",
+ "loss": 0.6686,
+ "slid_loss": 0.619,
+ "step": 3827,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1985e-04",
+ "loss": 0.5144,
+ "slid_loss": 0.6176,
+ "step": 3828,
+ "time": 13.81
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1982e-04",
+ "loss": 0.5355,
+ "slid_loss": 0.6176,
+ "step": 3829,
+ "time": 14.09
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1980e-04",
+ "loss": 0.5977,
+ "slid_loss": 0.6179,
+ "step": 3830,
+ "time": 13.34
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1977e-04",
+ "loss": 0.6566,
+ "slid_loss": 0.6188,
+ "step": 3831,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1974e-04",
+ "loss": 0.5541,
+ "slid_loss": 0.6182,
+ "step": 3832,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1972e-04",
+ "loss": 0.6048,
+ "slid_loss": 0.6182,
+ "step": 3833,
+ "time": 13.36
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1969e-04",
+ "loss": 0.6216,
+ "slid_loss": 0.6184,
+ "step": 3834,
+ "time": 12.81
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1966e-04",
+ "loss": 0.6296,
+ "slid_loss": 0.6186,
+ "step": 3835,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": "1.1964e-04",
+ "loss": 0.6122,
+ "slid_loss": 0.6182,
+ "step": 3836,
+ "time": 12.87
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1961e-04",
+ "loss": 0.5777,
+ "slid_loss": 0.6179,
+ "step": 3837,
+ "time": 13.74
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1958e-04",
+ "loss": 0.5797,
+ "slid_loss": 0.6169,
+ "step": 3838,
+ "time": 13.65
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1956e-04",
+ "loss": 0.6241,
+ "slid_loss": 0.6177,
+ "step": 3839,
+ "time": 13.42
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1953e-04",
+ "loss": 0.6271,
+ "slid_loss": 0.6177,
+ "step": 3840,
+ "time": 12.35
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1950e-04",
+ "loss": 0.6186,
+ "slid_loss": 0.6169,
+ "step": 3841,
+ "time": 13.46
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1948e-04",
+ "loss": 0.6126,
+ "slid_loss": 0.6176,
+ "step": 3842,
+ "time": 12.69
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1945e-04",
+ "loss": 0.6026,
+ "slid_loss": 0.6168,
+ "step": 3843,
+ "time": 12.92
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1942e-04",
+ "loss": 0.5642,
+ "slid_loss": 0.6165,
+ "step": 3844,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1940e-04",
+ "loss": 0.5821,
+ "slid_loss": 0.6162,
+ "step": 3845,
+ "time": 12.43
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": "1.1937e-04",
+ "loss": 0.5647,
+ "slid_loss": 0.6152,
+ "step": 3846,
+ "time": 13.91
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1935e-04",
+ "loss": 0.5998,
+ "slid_loss": 0.6148,
+ "step": 3847,
+ "time": 11.46
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1932e-04",
+ "loss": 0.557,
+ "slid_loss": 0.6134,
+ "step": 3848,
+ "time": 13.48
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1929e-04",
+ "loss": 0.538,
+ "slid_loss": 0.6128,
+ "step": 3849,
+ "time": 11.36
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1927e-04",
+ "loss": 0.5282,
+ "slid_loss": 0.612,
+ "step": 3850,
+ "time": 12.6
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1924e-04",
+ "loss": 0.6106,
+ "slid_loss": 0.6119,
+ "step": 3851,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1921e-04",
+ "loss": 0.6449,
+ "slid_loss": 0.6109,
+ "step": 3852,
+ "time": 11.75
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1919e-04",
+ "loss": 0.5999,
+ "slid_loss": 0.6101,
+ "step": 3853,
+ "time": 12.91
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1916e-04",
+ "loss": 0.601,
+ "slid_loss": 0.6103,
+ "step": 3854,
+ "time": 13.58
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1913e-04",
+ "loss": 0.689,
+ "slid_loss": 0.6098,
+ "step": 3855,
+ "time": 13.68
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": "1.1911e-04",
+ "loss": 0.5775,
+ "slid_loss": 0.6099,
+ "step": 3856,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1908e-04",
+ "loss": 0.6907,
+ "slid_loss": 0.6102,
+ "step": 3857,
+ "time": 13.47
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1905e-04",
+ "loss": 0.7058,
+ "slid_loss": 0.6115,
+ "step": 3858,
+ "time": 12.41
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1903e-04",
+ "loss": 0.6423,
+ "slid_loss": 0.6118,
+ "step": 3859,
+ "time": 13.62
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1900e-04",
+ "loss": 0.6328,
+ "slid_loss": 0.6113,
+ "step": 3860,
+ "time": 11.6
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1898e-04",
+ "loss": 0.6567,
+ "slid_loss": 0.6115,
+ "step": 3861,
+ "time": 13.83
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1895e-04",
+ "loss": 0.656,
+ "slid_loss": 0.6129,
+ "step": 3862,
+ "time": 12.59
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1892e-04",
+ "loss": 0.7047,
+ "slid_loss": 0.6138,
+ "step": 3863,
+ "time": 13.99
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1890e-04",
+ "loss": 0.6702,
+ "slid_loss": 0.6144,
+ "step": 3864,
+ "time": 12.62
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1887e-04",
+ "loss": 0.5953,
+ "slid_loss": 0.6145,
+ "step": 3865,
+ "time": 13.82
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1884e-04",
+ "loss": 0.6306,
+ "slid_loss": 0.6149,
+ "step": 3866,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": "1.1882e-04",
+ "loss": 0.6205,
+ "slid_loss": 0.6147,
+ "step": 3867,
+ "time": 14.19
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1879e-04",
+ "loss": 0.6103,
+ "slid_loss": 0.6145,
+ "step": 3868,
+ "time": 13.46
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1877e-04",
+ "loss": 0.6188,
+ "slid_loss": 0.6149,
+ "step": 3869,
+ "time": 13.16
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1874e-04",
+ "loss": 0.6661,
+ "slid_loss": 0.6164,
+ "step": 3870,
+ "time": 12.84
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1871e-04",
+ "loss": 0.6449,
+ "slid_loss": 0.6172,
+ "step": 3871,
+ "time": 13.93
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1869e-04",
+ "loss": 0.5969,
+ "slid_loss": 0.6177,
+ "step": 3872,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1866e-04",
+ "loss": 0.608,
+ "slid_loss": 0.6183,
+ "step": 3873,
+ "time": 13.75
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1863e-04",
+ "loss": 0.6065,
+ "slid_loss": 0.6176,
+ "step": 3874,
+ "time": 14.18
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1861e-04",
+ "loss": 0.511,
+ "slid_loss": 0.6158,
+ "step": 3875,
+ "time": 13.18
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1858e-04",
+ "loss": 0.6424,
+ "slid_loss": 0.6162,
+ "step": 3876,
+ "time": 13.01
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": "1.1856e-04",
+ "loss": 0.6356,
+ "slid_loss": 0.6161,
+ "step": 3877,
+ "time": 13.11
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1853e-04",
+ "loss": 0.6643,
+ "slid_loss": 0.6167,
+ "step": 3878,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1850e-04",
+ "loss": 0.6946,
+ "slid_loss": 0.6172,
+ "step": 3879,
+ "time": 12.43
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1848e-04",
+ "loss": 0.606,
+ "slid_loss": 0.6182,
+ "step": 3880,
+ "time": 10.94
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1845e-04",
+ "loss": 0.5943,
+ "slid_loss": 0.6186,
+ "step": 3881,
+ "time": 12.75
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1843e-04",
+ "loss": 0.6546,
+ "slid_loss": 0.6185,
+ "step": 3882,
+ "time": 13.31
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1840e-04",
+ "loss": 0.6581,
+ "slid_loss": 0.6186,
+ "step": 3883,
+ "time": 13.33
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1837e-04",
+ "loss": 0.5913,
+ "slid_loss": 0.6178,
+ "step": 3884,
+ "time": 13.98
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1835e-04",
+ "loss": 0.6954,
+ "slid_loss": 0.6182,
+ "step": 3885,
+ "time": 12.03
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1832e-04",
+ "loss": 0.6381,
+ "slid_loss": 0.6178,
+ "step": 3886,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1830e-04",
+ "loss": 0.6418,
+ "slid_loss": 0.6169,
+ "step": 3887,
+ "time": 13.68
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": "1.1827e-04",
+ "loss": 0.6672,
+ "slid_loss": 0.6167,
+ "step": 3888,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1824e-04",
+ "loss": 0.7444,
+ "slid_loss": 0.618,
+ "step": 3889,
+ "time": 11.97
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1822e-04",
+ "loss": 0.5814,
+ "slid_loss": 0.6189,
+ "step": 3890,
+ "time": 13.69
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1819e-04",
+ "loss": 0.6276,
+ "slid_loss": 0.6188,
+ "step": 3891,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1817e-04",
+ "loss": 0.69,
+ "slid_loss": 0.6193,
+ "step": 3892,
+ "time": 14.09
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1814e-04",
+ "loss": 0.6502,
+ "slid_loss": 0.6197,
+ "step": 3893,
+ "time": 12.93
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1812e-04",
+ "loss": 0.5799,
+ "slid_loss": 0.6193,
+ "step": 3894,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1809e-04",
+ "loss": 0.6028,
+ "slid_loss": 0.6198,
+ "step": 3895,
+ "time": 13.53
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1806e-04",
+ "loss": 0.5532,
+ "slid_loss": 0.6182,
+ "step": 3896,
+ "time": 12.86
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1804e-04",
+ "loss": 0.6812,
+ "slid_loss": 0.6187,
+ "step": 3897,
+ "time": 13.4
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": "1.1801e-04",
+ "loss": 0.5525,
+ "slid_loss": 0.6177,
+ "step": 3898,
+ "time": 12.98
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1799e-04",
+ "loss": 0.623,
+ "slid_loss": 0.618,
+ "step": 3899,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1796e-04",
+ "loss": 0.6304,
+ "slid_loss": 0.6171,
+ "step": 3900,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1793e-04",
+ "loss": 0.6627,
+ "slid_loss": 0.6173,
+ "step": 3901,
+ "time": 12.05
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1791e-04",
+ "loss": 0.6577,
+ "slid_loss": 0.6178,
+ "step": 3902,
+ "time": 14.26
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1788e-04",
+ "loss": 0.6943,
+ "slid_loss": 0.6192,
+ "step": 3903,
+ "time": 11.0
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1786e-04",
+ "loss": 0.6034,
+ "slid_loss": 0.6194,
+ "step": 3904,
+ "time": 13.85
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1783e-04",
+ "loss": 0.6199,
+ "slid_loss": 0.6195,
+ "step": 3905,
+ "time": 13.59
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1781e-04",
+ "loss": 0.5759,
+ "slid_loss": 0.618,
+ "step": 3906,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1778e-04",
+ "loss": 0.5461,
+ "slid_loss": 0.6168,
+ "step": 3907,
+ "time": 12.66
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": "1.1776e-04",
+ "loss": 0.6418,
+ "slid_loss": 0.6174,
+ "step": 3908,
+ "time": 13.64
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1773e-04",
+ "loss": 0.5691,
+ "slid_loss": 0.617,
+ "step": 3909,
+ "time": 13.33
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1770e-04",
+ "loss": 0.6176,
+ "slid_loss": 0.6169,
+ "step": 3910,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1768e-04",
+ "loss": 0.6121,
+ "slid_loss": 0.6174,
+ "step": 3911,
+ "time": 12.96
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1765e-04",
+ "loss": 0.6318,
+ "slid_loss": 0.618,
+ "step": 3912,
+ "time": 14.11
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1763e-04",
+ "loss": 0.6085,
+ "slid_loss": 0.6183,
+ "step": 3913,
+ "time": 12.4
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1760e-04",
+ "loss": 0.5625,
+ "slid_loss": 0.618,
+ "step": 3914,
+ "time": 13.28
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1758e-04",
+ "loss": 0.5731,
+ "slid_loss": 0.617,
+ "step": 3915,
+ "time": 13.51
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1755e-04",
+ "loss": 0.5926,
+ "slid_loss": 0.6171,
+ "step": 3916,
+ "time": 14.1
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1753e-04",
+ "loss": 0.6802,
+ "slid_loss": 0.6174,
+ "step": 3917,
+ "time": 13.8
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1750e-04",
+ "loss": 0.5793,
+ "slid_loss": 0.6168,
+ "step": 3918,
+ "time": 11.23
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": "1.1747e-04",
+ "loss": 0.6132,
+ "slid_loss": 0.6168,
+ "step": 3919,
+ "time": 14.39
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1745e-04",
+ "loss": 0.6688,
+ "slid_loss": 0.6176,
+ "step": 3920,
+ "time": 12.11
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1742e-04",
+ "loss": 0.6304,
+ "slid_loss": 0.6178,
+ "step": 3921,
+ "time": 14.28
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1740e-04",
+ "loss": 0.6145,
+ "slid_loss": 0.6183,
+ "step": 3922,
+ "time": 13.32
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1737e-04",
+ "loss": 0.5607,
+ "slid_loss": 0.6184,
+ "step": 3923,
+ "time": 13.37
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1735e-04",
+ "loss": 0.6,
+ "slid_loss": 0.6185,
+ "step": 3924,
+ "time": 13.92
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1732e-04",
+ "loss": 0.6452,
+ "slid_loss": 0.6184,
+ "step": 3925,
+ "time": 12.34
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1730e-04",
+ "loss": 0.577,
+ "slid_loss": 0.618,
+ "step": 3926,
+ "time": 11.86
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1727e-04",
+ "loss": 0.6416,
+ "slid_loss": 0.6177,
+ "step": 3927,
+ "time": 11.12
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1725e-04",
+ "loss": 0.6312,
+ "slid_loss": 0.6189,
+ "step": 3928,
+ "time": 13.29
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": "1.1722e-04",
+ "loss": 0.584,
+ "slid_loss": 0.6194,
+ "step": 3929,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1719e-04",
+ "loss": 0.4918,
+ "slid_loss": 0.6183,
+ "step": 3930,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1717e-04",
+ "loss": 0.6898,
+ "slid_loss": 0.6187,
+ "step": 3931,
+ "time": 12.43
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1714e-04",
+ "loss": 0.6622,
+ "slid_loss": 0.6197,
+ "step": 3932,
+ "time": 12.29
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1712e-04",
+ "loss": 0.6442,
+ "slid_loss": 0.6201,
+ "step": 3933,
+ "time": 13.66
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1709e-04",
+ "loss": 0.5799,
+ "slid_loss": 0.6197,
+ "step": 3934,
+ "time": 14.19
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1707e-04",
+ "loss": 0.565,
+ "slid_loss": 0.6191,
+ "step": 3935,
+ "time": 13.75
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1704e-04",
+ "loss": 0.6401,
+ "slid_loss": 0.6194,
+ "step": 3936,
+ "time": 14.02
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1702e-04",
+ "loss": 0.67,
+ "slid_loss": 0.6203,
+ "step": 3937,
+ "time": 13.87
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1699e-04",
+ "loss": 0.6133,
+ "slid_loss": 0.6206,
+ "step": 3938,
+ "time": 14.06
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1697e-04",
+ "loss": 0.5876,
+ "slid_loss": 0.6203,
+ "step": 3939,
+ "time": 14.0
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": "1.1694e-04",
+ "loss": 0.5145,
+ "slid_loss": 0.6191,
+ "step": 3940,
+ "time": 13.32
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1692e-04",
+ "loss": 0.5337,
+ "slid_loss": 0.6183,
+ "step": 3941,
+ "time": 13.12
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1689e-04",
+ "loss": 0.5913,
+ "slid_loss": 0.6181,
+ "step": 3942,
+ "time": 13.71
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1687e-04",
+ "loss": 0.547,
+ "slid_loss": 0.6175,
+ "step": 3943,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1684e-04",
+ "loss": 0.6322,
+ "slid_loss": 0.6182,
+ "step": 3944,
+ "time": 13.11
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1682e-04",
+ "loss": 0.5544,
+ "slid_loss": 0.6179,
+ "step": 3945,
+ "time": 13.65
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1679e-04",
+ "loss": 0.5796,
+ "slid_loss": 0.6181,
+ "step": 3946,
+ "time": 12.79
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1677e-04",
+ "loss": 0.6877,
+ "slid_loss": 0.6189,
+ "step": 3947,
+ "time": 13.68
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1674e-04",
+ "loss": 0.5141,
+ "slid_loss": 0.6185,
+ "step": 3948,
+ "time": 12.77
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1672e-04",
+ "loss": 0.5914,
+ "slid_loss": 0.619,
+ "step": 3949,
+ "time": 12.28
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": "1.1669e-04",
+ "loss": 0.5865,
+ "slid_loss": 0.6196,
+ "step": 3950,
+ "time": 13.48
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1667e-04",
+ "loss": 0.5657,
+ "slid_loss": 0.6192,
+ "step": 3951,
+ "time": 13.54
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1664e-04",
+ "loss": 0.5483,
+ "slid_loss": 0.6182,
+ "step": 3952,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1662e-04",
+ "loss": 0.5609,
+ "slid_loss": 0.6178,
+ "step": 3953,
+ "time": 13.32
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1659e-04",
+ "loss": 0.573,
+ "slid_loss": 0.6175,
+ "step": 3954,
+ "time": 11.47
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1657e-04",
+ "loss": 0.6303,
+ "slid_loss": 0.617,
+ "step": 3955,
+ "time": 13.31
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1654e-04",
+ "loss": 0.6452,
+ "slid_loss": 0.6176,
+ "step": 3956,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1652e-04",
+ "loss": 0.551,
+ "slid_loss": 0.6162,
+ "step": 3957,
+ "time": 12.05
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1649e-04",
+ "loss": 0.5349,
+ "slid_loss": 0.6145,
+ "step": 3958,
+ "time": 13.73
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1647e-04",
+ "loss": 0.697,
+ "slid_loss": 0.6151,
+ "step": 3959,
+ "time": 13.55
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1644e-04",
+ "loss": 0.6113,
+ "slid_loss": 0.6149,
+ "step": 3960,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": "1.1642e-04",
+ "loss": 0.607,
+ "slid_loss": 0.6144,
+ "step": 3961,
+ "time": 12.11
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1639e-04",
+ "loss": 0.6524,
+ "slid_loss": 0.6143,
+ "step": 3962,
+ "time": 13.7
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1637e-04",
+ "loss": 0.6533,
+ "slid_loss": 0.6138,
+ "step": 3963,
+ "time": 11.73
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1634e-04",
+ "loss": 0.5206,
+ "slid_loss": 0.6123,
+ "step": 3964,
+ "time": 13.72
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1632e-04",
+ "loss": 0.5989,
+ "slid_loss": 0.6124,
+ "step": 3965,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1629e-04",
+ "loss": 0.5634,
+ "slid_loss": 0.6117,
+ "step": 3966,
+ "time": 11.34
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1627e-04",
+ "loss": 0.6446,
+ "slid_loss": 0.6119,
+ "step": 3967,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1624e-04",
+ "loss": 0.5039,
+ "slid_loss": 0.6109,
+ "step": 3968,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1622e-04",
+ "loss": 0.6873,
+ "slid_loss": 0.6115,
+ "step": 3969,
+ "time": 13.66
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1619e-04",
+ "loss": 0.6717,
+ "slid_loss": 0.6116,
+ "step": 3970,
+ "time": 14.38
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": "1.1617e-04",
+ "loss": 0.5589,
+ "slid_loss": 0.6107,
+ "step": 3971,
+ "time": 14.27
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1615e-04",
+ "loss": 0.6608,
+ "slid_loss": 0.6114,
+ "step": 3972,
+ "time": 13.57
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1612e-04",
+ "loss": 0.5427,
+ "slid_loss": 0.6107,
+ "step": 3973,
+ "time": 13.63
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1610e-04",
+ "loss": 0.6012,
+ "slid_loss": 0.6107,
+ "step": 3974,
+ "time": 12.18
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1607e-04",
+ "loss": 0.6929,
+ "slid_loss": 0.6125,
+ "step": 3975,
+ "time": 14.38
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1605e-04",
+ "loss": 0.572,
+ "slid_loss": 0.6118,
+ "step": 3976,
+ "time": 13.57
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1602e-04",
+ "loss": 0.5907,
+ "slid_loss": 0.6113,
+ "step": 3977,
+ "time": 13.2
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1600e-04",
+ "loss": 0.6346,
+ "slid_loss": 0.611,
+ "step": 3978,
+ "time": 12.94
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1597e-04",
+ "loss": 0.6355,
+ "slid_loss": 0.6105,
+ "step": 3979,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1595e-04",
+ "loss": 0.6397,
+ "slid_loss": 0.6108,
+ "step": 3980,
+ "time": 13.4
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": "1.1592e-04",
+ "loss": 0.5998,
+ "slid_loss": 0.6108,
+ "step": 3981,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1590e-04",
+ "loss": 0.5748,
+ "slid_loss": 0.61,
+ "step": 3982,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1587e-04",
+ "loss": 0.6448,
+ "slid_loss": 0.6099,
+ "step": 3983,
+ "time": 13.32
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1585e-04",
+ "loss": 0.6706,
+ "slid_loss": 0.6107,
+ "step": 3984,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1583e-04",
+ "loss": 0.5662,
+ "slid_loss": 0.6094,
+ "step": 3985,
+ "time": 14.15
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1580e-04",
+ "loss": 0.6456,
+ "slid_loss": 0.6095,
+ "step": 3986,
+ "time": 13.34
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1578e-04",
+ "loss": 0.6174,
+ "slid_loss": 0.6092,
+ "step": 3987,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1575e-04",
+ "loss": 0.5909,
+ "slid_loss": 0.6085,
+ "step": 3988,
+ "time": 12.83
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1573e-04",
+ "loss": 0.5482,
+ "slid_loss": 0.6065,
+ "step": 3989,
+ "time": 13.2
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1570e-04",
+ "loss": 0.5433,
+ "slid_loss": 0.6061,
+ "step": 3990,
+ "time": 13.64
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1568e-04",
+ "loss": 0.6493,
+ "slid_loss": 0.6064,
+ "step": 3991,
+ "time": 13.01
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": "1.1565e-04",
+ "loss": 0.6007,
+ "slid_loss": 0.6055,
+ "step": 3992,
+ "time": 13.47
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1563e-04",
+ "loss": 0.5374,
+ "slid_loss": 0.6043,
+ "step": 3993,
+ "time": 13.64
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1561e-04",
+ "loss": 0.6201,
+ "slid_loss": 0.6047,
+ "step": 3994,
+ "time": 12.09
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1558e-04",
+ "loss": 0.6344,
+ "slid_loss": 0.6051,
+ "step": 3995,
+ "time": 12.35
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1556e-04",
+ "loss": 0.6361,
+ "slid_loss": 0.6059,
+ "step": 3996,
+ "time": 13.45
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1553e-04",
+ "loss": 0.5547,
+ "slid_loss": 0.6046,
+ "step": 3997,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1551e-04",
+ "loss": 0.5554,
+ "slid_loss": 0.6046,
+ "step": 3998,
+ "time": 14.22
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1548e-04",
+ "loss": 0.5539,
+ "slid_loss": 0.604,
+ "step": 3999,
+ "time": 13.39
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1546e-04",
+ "loss": 0.5941,
+ "slid_loss": 0.6036,
+ "step": 4000,
+ "time": 13.61
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1544e-04",
+ "loss": 0.5709,
+ "slid_loss": 0.6027,
+ "step": 4001,
+ "time": 13.73
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": "1.1541e-04",
+ "loss": 0.6868,
+ "slid_loss": 0.603,
+ "step": 4002,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1539e-04",
+ "loss": 0.5791,
+ "slid_loss": 0.6018,
+ "step": 4003,
+ "time": 13.23
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1536e-04",
+ "loss": 0.5463,
+ "slid_loss": 0.6012,
+ "step": 4004,
+ "time": 11.23
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1534e-04",
+ "loss": 0.5575,
+ "slid_loss": 0.6006,
+ "step": 4005,
+ "time": 13.64
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1531e-04",
+ "loss": 0.5689,
+ "slid_loss": 0.6005,
+ "step": 4006,
+ "time": 11.56
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1529e-04",
+ "loss": 0.4924,
+ "slid_loss": 0.6,
+ "step": 4007,
+ "time": 12.04
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1527e-04",
+ "loss": 0.7048,
+ "slid_loss": 0.6006,
+ "step": 4008,
+ "time": 13.35
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1524e-04",
+ "loss": 0.6141,
+ "slid_loss": 0.6011,
+ "step": 4009,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1522e-04",
+ "loss": 0.5827,
+ "slid_loss": 0.6007,
+ "step": 4010,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1519e-04",
+ "loss": 0.6003,
+ "slid_loss": 0.6006,
+ "step": 4011,
+ "time": 14.16
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1517e-04",
+ "loss": 0.6297,
+ "slid_loss": 0.6006,
+ "step": 4012,
+ "time": 13.15
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": "1.1515e-04",
+ "loss": 0.6269,
+ "slid_loss": 0.6008,
+ "step": 4013,
+ "time": 13.77
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1512e-04",
+ "loss": 0.6014,
+ "slid_loss": 0.6012,
+ "step": 4014,
+ "time": 13.13
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1510e-04",
+ "loss": 0.6625,
+ "slid_loss": 0.6021,
+ "step": 4015,
+ "time": 11.9
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1507e-04",
+ "loss": 0.6091,
+ "slid_loss": 0.6022,
+ "step": 4016,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1505e-04",
+ "loss": 0.5482,
+ "slid_loss": 0.6009,
+ "step": 4017,
+ "time": 11.78
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1503e-04",
+ "loss": 0.5717,
+ "slid_loss": 0.6008,
+ "step": 4018,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1500e-04",
+ "loss": 0.5935,
+ "slid_loss": 0.6006,
+ "step": 4019,
+ "time": 11.38
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1498e-04",
+ "loss": 0.5715,
+ "slid_loss": 0.5997,
+ "step": 4020,
+ "time": 13.87
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1495e-04",
+ "loss": 0.5362,
+ "slid_loss": 0.5987,
+ "step": 4021,
+ "time": 12.77
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1493e-04",
+ "loss": 0.5917,
+ "slid_loss": 0.5985,
+ "step": 4022,
+ "time": 12.45
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": "1.1491e-04",
+ "loss": 0.6247,
+ "slid_loss": 0.5991,
+ "step": 4023,
+ "time": 13.97
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1488e-04",
+ "loss": 0.6118,
+ "slid_loss": 0.5993,
+ "step": 4024,
+ "time": 11.36
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1486e-04",
+ "loss": 0.5927,
+ "slid_loss": 0.5987,
+ "step": 4025,
+ "time": 11.69
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1483e-04",
+ "loss": 0.5684,
+ "slid_loss": 0.5986,
+ "step": 4026,
+ "time": 13.57
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1481e-04",
+ "loss": 0.6039,
+ "slid_loss": 0.5983,
+ "step": 4027,
+ "time": 13.81
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1479e-04",
+ "loss": 0.5953,
+ "slid_loss": 0.5979,
+ "step": 4028,
+ "time": 11.92
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1476e-04",
+ "loss": 0.6248,
+ "slid_loss": 0.5983,
+ "step": 4029,
+ "time": 13.69
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1474e-04",
+ "loss": 0.5763,
+ "slid_loss": 0.5992,
+ "step": 4030,
+ "time": 11.51
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1472e-04",
+ "loss": 0.6379,
+ "slid_loss": 0.5986,
+ "step": 4031,
+ "time": 13.17
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1469e-04",
+ "loss": 0.6987,
+ "slid_loss": 0.599,
+ "step": 4032,
+ "time": 14.07
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": "1.1467e-04",
+ "loss": 0.5976,
+ "slid_loss": 0.5985,
+ "step": 4033,
+ "time": 14.13
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1464e-04",
+ "loss": 0.4649,
+ "slid_loss": 0.5974,
+ "step": 4034,
+ "time": 13.26
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1462e-04",
+ "loss": 0.6506,
+ "slid_loss": 0.5983,
+ "step": 4035,
+ "time": 11.66
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1460e-04",
+ "loss": 0.6715,
+ "slid_loss": 0.5986,
+ "step": 4036,
+ "time": 14.48
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1457e-04",
+ "loss": 0.6134,
+ "slid_loss": 0.598,
+ "step": 4037,
+ "time": 11.9
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1455e-04",
+ "loss": 0.6169,
+ "slid_loss": 0.598,
+ "step": 4038,
+ "time": 11.76
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1453e-04",
+ "loss": 0.5726,
+ "slid_loss": 0.5979,
+ "step": 4039,
+ "time": 11.18
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1450e-04",
+ "loss": 0.6319,
+ "slid_loss": 0.5991,
+ "step": 4040,
+ "time": 14.2
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1448e-04",
+ "loss": 0.595,
+ "slid_loss": 0.5997,
+ "step": 4041,
+ "time": 12.03
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1446e-04",
+ "loss": 0.5861,
+ "slid_loss": 0.5996,
+ "step": 4042,
+ "time": 13.66
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1443e-04",
+ "loss": 0.5523,
+ "slid_loss": 0.5997,
+ "step": 4043,
+ "time": 13.19
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": "1.1441e-04",
+ "loss": 0.5871,
+ "slid_loss": 0.5992,
+ "step": 4044,
+ "time": 12.68
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1438e-04",
+ "loss": 0.5741,
+ "slid_loss": 0.5994,
+ "step": 4045,
+ "time": 12.74
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1436e-04",
+ "loss": 0.6942,
+ "slid_loss": 0.6006,
+ "step": 4046,
+ "time": 13.95
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1434e-04",
+ "loss": 0.5799,
+ "slid_loss": 0.5995,
+ "step": 4047,
+ "time": 13.04
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1431e-04",
+ "loss": 0.6397,
+ "slid_loss": 0.6007,
+ "step": 4048,
+ "time": 10.81
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1429e-04",
+ "loss": 0.6026,
+ "slid_loss": 0.6009,
+ "step": 4049,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1427e-04",
+ "loss": 0.5622,
+ "slid_loss": 0.6006,
+ "step": 4050,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1424e-04",
+ "loss": 0.6563,
+ "slid_loss": 0.6015,
+ "step": 4051,
+ "time": 14.0
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1422e-04",
+ "loss": 0.5384,
+ "slid_loss": 0.6014,
+ "step": 4052,
+ "time": 12.55
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1420e-04",
+ "loss": 0.6316,
+ "slid_loss": 0.6021,
+ "step": 4053,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": "1.1417e-04",
+ "loss": 0.5316,
+ "slid_loss": 0.6017,
+ "step": 4054,
+ "time": 12.44
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1415e-04",
+ "loss": 0.6343,
+ "slid_loss": 0.6018,
+ "step": 4055,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1413e-04",
+ "loss": 0.4908,
+ "slid_loss": 0.6002,
+ "step": 4056,
+ "time": 13.5
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1410e-04",
+ "loss": 0.6749,
+ "slid_loss": 0.6014,
+ "step": 4057,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1408e-04",
+ "loss": 0.6145,
+ "slid_loss": 0.6022,
+ "step": 4058,
+ "time": 13.94
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1406e-04",
+ "loss": 0.5594,
+ "slid_loss": 0.6009,
+ "step": 4059,
+ "time": 10.98
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1403e-04",
+ "loss": 0.614,
+ "slid_loss": 0.6009,
+ "step": 4060,
+ "time": 13.57
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1401e-04",
+ "loss": 0.5688,
+ "slid_loss": 0.6005,
+ "step": 4061,
+ "time": 11.28
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1399e-04",
+ "loss": 0.6423,
+ "slid_loss": 0.6004,
+ "step": 4062,
+ "time": 13.02
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1396e-04",
+ "loss": 0.5354,
+ "slid_loss": 0.5992,
+ "step": 4063,
+ "time": 12.79
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1394e-04",
+ "loss": 0.6376,
+ "slid_loss": 0.6004,
+ "step": 4064,
+ "time": 11.44
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": "1.1392e-04",
+ "loss": 0.6201,
+ "slid_loss": 0.6006,
+ "step": 4065,
+ "time": 13.74
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1389e-04",
+ "loss": 0.611,
+ "slid_loss": 0.6011,
+ "step": 4066,
+ "time": 13.34
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1387e-04",
+ "loss": 0.592,
+ "slid_loss": 0.6006,
+ "step": 4067,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1385e-04",
+ "loss": 0.6536,
+ "slid_loss": 0.6021,
+ "step": 4068,
+ "time": 12.28
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1382e-04",
+ "loss": 0.5569,
+ "slid_loss": 0.6008,
+ "step": 4069,
+ "time": 12.31
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1380e-04",
+ "loss": 0.5879,
+ "slid_loss": 0.5999,
+ "step": 4070,
+ "time": 13.76
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1378e-04",
+ "loss": 0.5516,
+ "slid_loss": 0.5998,
+ "step": 4071,
+ "time": 14.0
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1376e-04",
+ "loss": 0.5705,
+ "slid_loss": 0.5989,
+ "step": 4072,
+ "time": 12.19
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1373e-04",
+ "loss": 0.5519,
+ "slid_loss": 0.599,
+ "step": 4073,
+ "time": 13.78
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1371e-04",
+ "loss": 0.6779,
+ "slid_loss": 0.5998,
+ "step": 4074,
+ "time": 14.07
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": "1.1369e-04",
+ "loss": 0.5639,
+ "slid_loss": 0.5985,
+ "step": 4075,
+ "time": 12.84
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1366e-04",
+ "loss": 0.628,
+ "slid_loss": 0.5991,
+ "step": 4076,
+ "time": 12.76
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1364e-04",
+ "loss": 0.535,
+ "slid_loss": 0.5985,
+ "step": 4077,
+ "time": 11.41
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1362e-04",
+ "loss": 0.5914,
+ "slid_loss": 0.5981,
+ "step": 4078,
+ "time": 12.02
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1359e-04",
+ "loss": 0.6155,
+ "slid_loss": 0.5979,
+ "step": 4079,
+ "time": 12.76
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1357e-04",
+ "loss": 0.5951,
+ "slid_loss": 0.5974,
+ "step": 4080,
+ "time": 12.0
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1355e-04",
+ "loss": 0.6527,
+ "slid_loss": 0.598,
+ "step": 4081,
+ "time": 13.89
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1352e-04",
+ "loss": 0.6046,
+ "slid_loss": 0.5983,
+ "step": 4082,
+ "time": 13.33
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1350e-04",
+ "loss": 0.6154,
+ "slid_loss": 0.598,
+ "step": 4083,
+ "time": 13.41
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1348e-04",
+ "loss": 0.6121,
+ "slid_loss": 0.5974,
+ "step": 4084,
+ "time": 13.5
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": "1.1346e-04",
+ "loss": 0.6485,
+ "slid_loss": 0.5982,
+ "step": 4085,
+ "time": 11.67
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1343e-04",
+ "loss": 0.7311,
+ "slid_loss": 0.5991,
+ "step": 4086,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1341e-04",
+ "loss": 0.5099,
+ "slid_loss": 0.598,
+ "step": 4087,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1339e-04",
+ "loss": 0.6362,
+ "slid_loss": 0.5984,
+ "step": 4088,
+ "time": 13.43
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1336e-04",
+ "loss": 0.5041,
+ "slid_loss": 0.598,
+ "step": 4089,
+ "time": 12.98
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1334e-04",
+ "loss": 0.5043,
+ "slid_loss": 0.5976,
+ "step": 4090,
+ "time": 13.16
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1332e-04",
+ "loss": 0.573,
+ "slid_loss": 0.5968,
+ "step": 4091,
+ "time": 11.11
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1330e-04",
+ "loss": 0.6261,
+ "slid_loss": 0.5971,
+ "step": 4092,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1327e-04",
+ "loss": 0.6232,
+ "slid_loss": 0.598,
+ "step": 4093,
+ "time": 13.0
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1325e-04",
+ "loss": 0.558,
+ "slid_loss": 0.5973,
+ "step": 4094,
+ "time": 13.83
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1323e-04",
+ "loss": 0.6123,
+ "slid_loss": 0.5971,
+ "step": 4095,
+ "time": 12.11
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": "1.1321e-04",
+ "loss": 0.6046,
+ "slid_loss": 0.5968,
+ "step": 4096,
+ "time": 13.82
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1318e-04",
+ "loss": 0.6053,
+ "slid_loss": 0.5973,
+ "step": 4097,
+ "time": 12.94
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1316e-04",
+ "loss": 0.6471,
+ "slid_loss": 0.5982,
+ "step": 4098,
+ "time": 12.94
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1314e-04",
+ "loss": 0.7122,
+ "slid_loss": 0.5998,
+ "step": 4099,
+ "time": 13.16
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1311e-04",
+ "loss": 0.5361,
+ "slid_loss": 0.5992,
+ "step": 4100,
+ "time": 13.48
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1309e-04",
+ "loss": 0.5857,
+ "slid_loss": 0.5994,
+ "step": 4101,
+ "time": 13.69
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1307e-04",
+ "loss": 0.5808,
+ "slid_loss": 0.5983,
+ "step": 4102,
+ "time": 13.3
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1305e-04",
+ "loss": 0.5174,
+ "slid_loss": 0.5977,
+ "step": 4103,
+ "time": 13.38
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1302e-04",
+ "loss": 0.5903,
+ "slid_loss": 0.5981,
+ "step": 4104,
+ "time": 12.98
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1300e-04",
+ "loss": 0.7024,
+ "slid_loss": 0.5996,
+ "step": 4105,
+ "time": 14.31
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": "1.1298e-04",
+ "loss": 0.5825,
+ "slid_loss": 0.5997,
+ "step": 4106,
+ "time": 12.37
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1296e-04",
+ "loss": 0.667,
+ "slid_loss": 0.6015,
+ "step": 4107,
+ "time": 12.83
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1293e-04",
+ "loss": 0.5489,
+ "slid_loss": 0.5999,
+ "step": 4108,
+ "time": 11.89
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1291e-04",
+ "loss": 0.5863,
+ "slid_loss": 0.5996,
+ "step": 4109,
+ "time": 14.33
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1289e-04",
+ "loss": 0.5454,
+ "slid_loss": 0.5993,
+ "step": 4110,
+ "time": 11.3
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1287e-04",
+ "loss": 0.5276,
+ "slid_loss": 0.5985,
+ "step": 4111,
+ "time": 14.07
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1284e-04",
+ "loss": 0.577,
+ "slid_loss": 0.598,
+ "step": 4112,
+ "time": 10.8
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1282e-04",
+ "loss": 0.6569,
+ "slid_loss": 0.5983,
+ "step": 4113,
+ "time": 13.49
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1280e-04",
+ "loss": 0.6451,
+ "slid_loss": 0.5987,
+ "step": 4114,
+ "time": 12.76
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1278e-04",
+ "loss": 0.6669,
+ "slid_loss": 0.5988,
+ "step": 4115,
+ "time": 11.56
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1275e-04",
+ "loss": 0.6029,
+ "slid_loss": 0.5987,
+ "step": 4116,
+ "time": 13.95
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": "1.1273e-04",
+ "loss": 0.5764,
+ "slid_loss": 0.599,
+ "step": 4117,
+ "time": 12.62
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1271e-04",
+ "loss": 0.6039,
+ "slid_loss": 0.5993,
+ "step": 4118,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1269e-04",
+ "loss": 0.5623,
+ "slid_loss": 0.599,
+ "step": 4119,
+ "time": 12.05
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1267e-04",
+ "loss": 0.5828,
+ "slid_loss": 0.5991,
+ "step": 4120,
+ "time": 13.96
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1264e-04",
+ "loss": 0.6189,
+ "slid_loss": 0.6,
+ "step": 4121,
+ "time": 13.92
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1262e-04",
+ "loss": 0.5815,
+ "slid_loss": 0.5999,
+ "step": 4122,
+ "time": 13.34
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1260e-04",
+ "loss": 0.6288,
+ "slid_loss": 0.5999,
+ "step": 4123,
+ "time": 12.48
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1258e-04",
+ "loss": 0.5672,
+ "slid_loss": 0.5994,
+ "step": 4124,
+ "time": 11.79
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1255e-04",
+ "loss": 0.5078,
+ "slid_loss": 0.5986,
+ "step": 4125,
+ "time": 11.44
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1253e-04",
+ "loss": 0.6021,
+ "slid_loss": 0.5989,
+ "step": 4126,
+ "time": 13.35
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": "1.1251e-04",
+ "loss": 0.6428,
+ "slid_loss": 0.5993,
+ "step": 4127,
+ "time": 12.83
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1249e-04",
+ "loss": 0.5662,
+ "slid_loss": 0.599,
+ "step": 4128,
+ "time": 13.88
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1247e-04",
+ "loss": 0.6131,
+ "slid_loss": 0.5989,
+ "step": 4129,
+ "time": 13.86
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1244e-04",
+ "loss": 0.5741,
+ "slid_loss": 0.5989,
+ "step": 4130,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1242e-04",
+ "loss": 0.6084,
+ "slid_loss": 0.5986,
+ "step": 4131,
+ "time": 12.92
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1240e-04",
+ "loss": 0.5803,
+ "slid_loss": 0.5974,
+ "step": 4132,
+ "time": 12.84
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1238e-04",
+ "loss": 0.6413,
+ "slid_loss": 0.5979,
+ "step": 4133,
+ "time": 13.6
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1235e-04",
+ "loss": 0.4811,
+ "slid_loss": 0.598,
+ "step": 4134,
+ "time": 13.97
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1233e-04",
+ "loss": 0.5966,
+ "slid_loss": 0.5975,
+ "step": 4135,
+ "time": 12.97
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1231e-04",
+ "loss": 0.5623,
+ "slid_loss": 0.5964,
+ "step": 4136,
+ "time": 12.17
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": "1.1229e-04",
+ "loss": 0.5928,
+ "slid_loss": 0.5962,
+ "step": 4137,
+ "time": 11.16
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1227e-04",
+ "loss": 0.5565,
+ "slid_loss": 0.5956,
+ "step": 4138,
+ "time": 13.58
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1224e-04",
+ "loss": 0.5619,
+ "slid_loss": 0.5955,
+ "step": 4139,
+ "time": 12.8
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1222e-04",
+ "loss": 0.5439,
+ "slid_loss": 0.5946,
+ "step": 4140,
+ "time": 11.06
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1220e-04",
+ "loss": 0.6738,
+ "slid_loss": 0.5954,
+ "step": 4141,
+ "time": 14.75
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1218e-04",
+ "loss": 0.5953,
+ "slid_loss": 0.5955,
+ "step": 4142,
+ "time": 12.96
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1216e-04",
+ "loss": 0.6102,
+ "slid_loss": 0.596,
+ "step": 4143,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1213e-04",
+ "loss": 0.5712,
+ "slid_loss": 0.5959,
+ "step": 4144,
+ "time": 12.95
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1211e-04",
+ "loss": 0.5446,
+ "slid_loss": 0.5956,
+ "step": 4145,
+ "time": 12.33
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1209e-04",
+ "loss": 0.5269,
+ "slid_loss": 0.5939,
+ "step": 4146,
+ "time": 13.82
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1207e-04",
+ "loss": 0.5192,
+ "slid_loss": 0.5933,
+ "step": 4147,
+ "time": 13.58
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": "1.1205e-04",
+ "loss": 0.5593,
+ "slid_loss": 0.5925,
+ "step": 4148,
+ "time": 12.82
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1203e-04",
+ "loss": 0.5058,
+ "slid_loss": 0.5915,
+ "step": 4149,
+ "time": 14.27
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1200e-04",
+ "loss": 0.5501,
+ "slid_loss": 0.5914,
+ "step": 4150,
+ "time": 11.06
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1198e-04",
+ "loss": 0.5322,
+ "slid_loss": 0.5902,
+ "step": 4151,
+ "time": 13.25
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1196e-04",
+ "loss": 0.56,
+ "slid_loss": 0.5904,
+ "step": 4152,
+ "time": 11.84
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1194e-04",
+ "loss": 0.5916,
+ "slid_loss": 0.59,
+ "step": 4153,
+ "time": 13.29
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1192e-04",
+ "loss": 0.5878,
+ "slid_loss": 0.5906,
+ "step": 4154,
+ "time": 13.22
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1189e-04",
+ "loss": 0.6276,
+ "slid_loss": 0.5905,
+ "step": 4155,
+ "time": 13.21
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1187e-04",
+ "loss": 0.6033,
+ "slid_loss": 0.5916,
+ "step": 4156,
+ "time": 11.38
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1185e-04",
+ "loss": 0.7522,
+ "slid_loss": 0.5924,
+ "step": 4157,
+ "time": 12.83
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": "1.1183e-04",
+ "loss": 0.6984,
+ "slid_loss": 0.5932,
+ "step": 4158,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1181e-04",
+ "loss": 0.5446,
+ "slid_loss": 0.5931,
+ "step": 4159,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1179e-04",
+ "loss": 0.4993,
+ "slid_loss": 0.5919,
+ "step": 4160,
+ "time": 11.56
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1176e-04",
+ "loss": 0.545,
+ "slid_loss": 0.5917,
+ "step": 4161,
+ "time": 13.23
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1174e-04",
+ "loss": 0.6502,
+ "slid_loss": 0.5918,
+ "step": 4162,
+ "time": 13.54
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1172e-04",
+ "loss": 0.5561,
+ "slid_loss": 0.592,
+ "step": 4163,
+ "time": 12.29
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1170e-04",
+ "loss": 0.603,
+ "slid_loss": 0.5916,
+ "step": 4164,
+ "time": 13.89
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1168e-04",
+ "loss": 0.5518,
+ "slid_loss": 0.5909,
+ "step": 4165,
+ "time": 166.37
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1166e-04",
+ "loss": 0.5478,
+ "slid_loss": 0.5903,
+ "step": 4166,
+ "time": 13.31
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1164e-04",
+ "loss": 0.6197,
+ "slid_loss": 0.5906,
+ "step": 4167,
+ "time": 13.03
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1161e-04",
+ "loss": 0.6394,
+ "slid_loss": 0.5905,
+ "step": 4168,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": "1.1159e-04",
+ "loss": 0.6264,
+ "slid_loss": 0.5911,
+ "step": 4169,
+ "time": 13.5
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1157e-04",
+ "loss": 0.5409,
+ "slid_loss": 0.5907,
+ "step": 4170,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1155e-04",
+ "loss": 0.6208,
+ "slid_loss": 0.5914,
+ "step": 4171,
+ "time": 12.31
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1153e-04",
+ "loss": 0.6154,
+ "slid_loss": 0.5918,
+ "step": 4172,
+ "time": 13.68
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1151e-04",
+ "loss": 0.5871,
+ "slid_loss": 0.5922,
+ "step": 4173,
+ "time": 13.25
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1149e-04",
+ "loss": 0.5877,
+ "slid_loss": 0.5913,
+ "step": 4174,
+ "time": 14.14
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1146e-04",
+ "loss": 0.5997,
+ "slid_loss": 0.5916,
+ "step": 4175,
+ "time": 13.98
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1144e-04",
+ "loss": 0.6019,
+ "slid_loss": 0.5914,
+ "step": 4176,
+ "time": 13.38
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1142e-04",
+ "loss": 0.572,
+ "slid_loss": 0.5917,
+ "step": 4177,
+ "time": 14.36
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1140e-04",
+ "loss": 0.5763,
+ "slid_loss": 0.5916,
+ "step": 4178,
+ "time": 13.37
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": "1.1138e-04",
+ "loss": 0.5558,
+ "slid_loss": 0.591,
+ "step": 4179,
+ "time": 11.18
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1136e-04",
+ "loss": 0.5822,
+ "slid_loss": 0.5909,
+ "step": 4180,
+ "time": 13.22
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1134e-04",
+ "loss": 0.5603,
+ "slid_loss": 0.5899,
+ "step": 4181,
+ "time": 11.43
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1131e-04",
+ "loss": 0.5613,
+ "slid_loss": 0.5895,
+ "step": 4182,
+ "time": 10.76
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1129e-04",
+ "loss": 0.5206,
+ "slid_loss": 0.5886,
+ "step": 4183,
+ "time": 11.39
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1127e-04",
+ "loss": 0.579,
+ "slid_loss": 0.5882,
+ "step": 4184,
+ "time": 10.63
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1125e-04",
+ "loss": 0.6052,
+ "slid_loss": 0.5878,
+ "step": 4185,
+ "time": 13.76
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1123e-04",
+ "loss": 0.4844,
+ "slid_loss": 0.5853,
+ "step": 4186,
+ "time": 13.41
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1121e-04",
+ "loss": 0.5044,
+ "slid_loss": 0.5853,
+ "step": 4187,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1119e-04",
+ "loss": 0.5329,
+ "slid_loss": 0.5842,
+ "step": 4188,
+ "time": 13.16
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1117e-04",
+ "loss": 0.6431,
+ "slid_loss": 0.5856,
+ "step": 4189,
+ "time": 13.4
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": "1.1115e-04",
+ "loss": 0.5438,
+ "slid_loss": 0.586,
+ "step": 4190,
+ "time": 11.22
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1112e-04",
+ "loss": 0.5815,
+ "slid_loss": 0.5861,
+ "step": 4191,
+ "time": 11.75
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1110e-04",
+ "loss": 0.7033,
+ "slid_loss": 0.5869,
+ "step": 4192,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1108e-04",
+ "loss": 0.6499,
+ "slid_loss": 0.5871,
+ "step": 4193,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1106e-04",
+ "loss": 0.5641,
+ "slid_loss": 0.5872,
+ "step": 4194,
+ "time": 12.84
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1104e-04",
+ "loss": 0.5623,
+ "slid_loss": 0.5867,
+ "step": 4195,
+ "time": 12.8
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1102e-04",
+ "loss": 0.5654,
+ "slid_loss": 0.5863,
+ "step": 4196,
+ "time": 13.44
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1100e-04",
+ "loss": 0.5869,
+ "slid_loss": 0.5861,
+ "step": 4197,
+ "time": 13.62
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1098e-04",
+ "loss": 0.56,
+ "slid_loss": 0.5853,
+ "step": 4198,
+ "time": 13.67
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1096e-04",
+ "loss": 0.5671,
+ "slid_loss": 0.5838,
+ "step": 4199,
+ "time": 12.09
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": "1.1094e-04",
+ "loss": 0.5913,
+ "slid_loss": 0.5844,
+ "step": 4200,
+ "time": 13.26
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1091e-04",
+ "loss": 0.5586,
+ "slid_loss": 0.5841,
+ "step": 4201,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1089e-04",
+ "loss": 0.5594,
+ "slid_loss": 0.5839,
+ "step": 4202,
+ "time": 12.82
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1087e-04",
+ "loss": 0.7161,
+ "slid_loss": 0.5859,
+ "step": 4203,
+ "time": 12.74
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1085e-04",
+ "loss": 0.5381,
+ "slid_loss": 0.5853,
+ "step": 4204,
+ "time": 13.36
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1083e-04",
+ "loss": 0.4979,
+ "slid_loss": 0.5833,
+ "step": 4205,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1081e-04",
+ "loss": 0.6594,
+ "slid_loss": 0.5841,
+ "step": 4206,
+ "time": 11.31
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1079e-04",
+ "loss": 0.6511,
+ "slid_loss": 0.5839,
+ "step": 4207,
+ "time": 13.18
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1077e-04",
+ "loss": 0.5611,
+ "slid_loss": 0.584,
+ "step": 4208,
+ "time": 12.94
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1075e-04",
+ "loss": 0.5947,
+ "slid_loss": 0.5841,
+ "step": 4209,
+ "time": 13.31
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": "1.1073e-04",
+ "loss": 0.6093,
+ "slid_loss": 0.5847,
+ "step": 4210,
+ "time": 11.37
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1071e-04",
+ "loss": 0.6287,
+ "slid_loss": 0.5858,
+ "step": 4211,
+ "time": 12.92
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1069e-04",
+ "loss": 0.4982,
+ "slid_loss": 0.585,
+ "step": 4212,
+ "time": 11.82
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1066e-04",
+ "loss": 0.5714,
+ "slid_loss": 0.5841,
+ "step": 4213,
+ "time": 12.31
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1064e-04",
+ "loss": 0.5676,
+ "slid_loss": 0.5833,
+ "step": 4214,
+ "time": 14.13
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1062e-04",
+ "loss": 0.6607,
+ "slid_loss": 0.5833,
+ "step": 4215,
+ "time": 13.57
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1060e-04",
+ "loss": 0.6135,
+ "slid_loss": 0.5834,
+ "step": 4216,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1058e-04",
+ "loss": 0.558,
+ "slid_loss": 0.5832,
+ "step": 4217,
+ "time": 11.59
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1056e-04",
+ "loss": 0.6425,
+ "slid_loss": 0.5836,
+ "step": 4218,
+ "time": 13.24
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1054e-04",
+ "loss": 0.635,
+ "slid_loss": 0.5843,
+ "step": 4219,
+ "time": 13.17
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1052e-04",
+ "loss": 0.5779,
+ "slid_loss": 0.5843,
+ "step": 4220,
+ "time": 12.64
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": "1.1050e-04",
+ "loss": 0.6459,
+ "slid_loss": 0.5845,
+ "step": 4221,
+ "time": 12.67
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1048e-04",
+ "loss": 0.5024,
+ "slid_loss": 0.5837,
+ "step": 4222,
+ "time": 13.71
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1046e-04",
+ "loss": 0.6053,
+ "slid_loss": 0.5835,
+ "step": 4223,
+ "time": 13.47
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1044e-04",
+ "loss": 0.5921,
+ "slid_loss": 0.5838,
+ "step": 4224,
+ "time": 12.83
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1042e-04",
+ "loss": 0.577,
+ "slid_loss": 0.5844,
+ "step": 4225,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1040e-04",
+ "loss": 0.5671,
+ "slid_loss": 0.5841,
+ "step": 4226,
+ "time": 12.25
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1038e-04",
+ "loss": 0.7054,
+ "slid_loss": 0.5847,
+ "step": 4227,
+ "time": 12.8
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1036e-04",
+ "loss": 0.5666,
+ "slid_loss": 0.5847,
+ "step": 4228,
+ "time": 13.53
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1034e-04",
+ "loss": 0.5965,
+ "slid_loss": 0.5846,
+ "step": 4229,
+ "time": 13.51
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1032e-04",
+ "loss": 0.5912,
+ "slid_loss": 0.5847,
+ "step": 4230,
+ "time": 11.79
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": "1.1029e-04",
+ "loss": 0.5425,
+ "slid_loss": 0.5841,
+ "step": 4231,
+ "time": 14.0
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1027e-04",
+ "loss": 0.5704,
+ "slid_loss": 0.584,
+ "step": 4232,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1025e-04",
+ "loss": 0.5127,
+ "slid_loss": 0.5827,
+ "step": 4233,
+ "time": 12.89
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1023e-04",
+ "loss": 0.5477,
+ "slid_loss": 0.5834,
+ "step": 4234,
+ "time": 12.31
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1021e-04",
+ "loss": 0.5216,
+ "slid_loss": 0.5826,
+ "step": 4235,
+ "time": 13.17
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1019e-04",
+ "loss": 0.6391,
+ "slid_loss": 0.5834,
+ "step": 4236,
+ "time": 13.63
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1017e-04",
+ "loss": 0.5674,
+ "slid_loss": 0.5831,
+ "step": 4237,
+ "time": 13.37
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1015e-04",
+ "loss": 0.5978,
+ "slid_loss": 0.5835,
+ "step": 4238,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1013e-04",
+ "loss": 0.4992,
+ "slid_loss": 0.5829,
+ "step": 4239,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1011e-04",
+ "loss": 0.5865,
+ "slid_loss": 0.5833,
+ "step": 4240,
+ "time": 12.76
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1009e-04",
+ "loss": 0.5741,
+ "slid_loss": 0.5823,
+ "step": 4241,
+ "time": 13.11
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": "1.1007e-04",
+ "loss": 0.5772,
+ "slid_loss": 0.5822,
+ "step": 4242,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.1005e-04",
+ "loss": 0.5783,
+ "slid_loss": 0.5818,
+ "step": 4243,
+ "time": 12.16
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.1003e-04",
+ "loss": 0.5315,
+ "slid_loss": 0.5814,
+ "step": 4244,
+ "time": 13.41
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.1001e-04",
+ "loss": 0.5487,
+ "slid_loss": 0.5815,
+ "step": 4245,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.0999e-04",
+ "loss": 0.5647,
+ "slid_loss": 0.5819,
+ "step": 4246,
+ "time": 12.77
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.0997e-04",
+ "loss": 0.6098,
+ "slid_loss": 0.5828,
+ "step": 4247,
+ "time": 13.82
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.0995e-04",
+ "loss": 0.5029,
+ "slid_loss": 0.5822,
+ "step": 4248,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.0993e-04",
+ "loss": 0.4987,
+ "slid_loss": 0.5821,
+ "step": 4249,
+ "time": 13.1
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.0991e-04",
+ "loss": 0.5329,
+ "slid_loss": 0.582,
+ "step": 4250,
+ "time": 13.71
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.0989e-04",
+ "loss": 0.6148,
+ "slid_loss": 0.5828,
+ "step": 4251,
+ "time": 11.78
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": "1.0987e-04",
+ "loss": 0.5762,
+ "slid_loss": 0.5829,
+ "step": 4252,
+ "time": 12.88
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0985e-04",
+ "loss": 0.6973,
+ "slid_loss": 0.584,
+ "step": 4253,
+ "time": 12.18
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0983e-04",
+ "loss": 0.5932,
+ "slid_loss": 0.5841,
+ "step": 4254,
+ "time": 12.92
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0981e-04",
+ "loss": 0.5841,
+ "slid_loss": 0.5836,
+ "step": 4255,
+ "time": 11.55
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0979e-04",
+ "loss": 0.5875,
+ "slid_loss": 0.5835,
+ "step": 4256,
+ "time": 14.34
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0977e-04",
+ "loss": 0.5127,
+ "slid_loss": 0.5811,
+ "step": 4257,
+ "time": 12.13
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0975e-04",
+ "loss": 0.6067,
+ "slid_loss": 0.5801,
+ "step": 4258,
+ "time": 12.19
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0973e-04",
+ "loss": 0.6113,
+ "slid_loss": 0.5808,
+ "step": 4259,
+ "time": 12.92
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0971e-04",
+ "loss": 0.5244,
+ "slid_loss": 0.5811,
+ "step": 4260,
+ "time": 13.86
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0969e-04",
+ "loss": 0.5904,
+ "slid_loss": 0.5815,
+ "step": 4261,
+ "time": 12.14
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": "1.0967e-04",
+ "loss": 0.5329,
+ "slid_loss": 0.5803,
+ "step": 4262,
+ "time": 12.02
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0965e-04",
+ "loss": 0.5185,
+ "slid_loss": 0.58,
+ "step": 4263,
+ "time": 12.79
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0963e-04",
+ "loss": 0.5831,
+ "slid_loss": 0.5798,
+ "step": 4264,
+ "time": 12.68
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0961e-04",
+ "loss": 0.5788,
+ "slid_loss": 0.58,
+ "step": 4265,
+ "time": 13.25
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0959e-04",
+ "loss": 0.5965,
+ "slid_loss": 0.5805,
+ "step": 4266,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0957e-04",
+ "loss": 0.5964,
+ "slid_loss": 0.5803,
+ "step": 4267,
+ "time": 13.55
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0955e-04",
+ "loss": 0.5525,
+ "slid_loss": 0.5794,
+ "step": 4268,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0953e-04",
+ "loss": 0.614,
+ "slid_loss": 0.5793,
+ "step": 4269,
+ "time": 11.24
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0951e-04",
+ "loss": 0.5847,
+ "slid_loss": 0.5797,
+ "step": 4270,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0949e-04",
+ "loss": 0.5628,
+ "slid_loss": 0.5792,
+ "step": 4271,
+ "time": 13.73
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0947e-04",
+ "loss": 0.577,
+ "slid_loss": 0.5788,
+ "step": 4272,
+ "time": 12.8
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": "1.0945e-04",
+ "loss": 0.5564,
+ "slid_loss": 0.5785,
+ "step": 4273,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0944e-04",
+ "loss": 0.6607,
+ "slid_loss": 0.5792,
+ "step": 4274,
+ "time": 13.57
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0942e-04",
+ "loss": 0.5974,
+ "slid_loss": 0.5792,
+ "step": 4275,
+ "time": 14.31
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0940e-04",
+ "loss": 0.5566,
+ "slid_loss": 0.5787,
+ "step": 4276,
+ "time": 12.58
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0938e-04",
+ "loss": 0.6618,
+ "slid_loss": 0.5796,
+ "step": 4277,
+ "time": 13.56
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0936e-04",
+ "loss": 0.6143,
+ "slid_loss": 0.58,
+ "step": 4278,
+ "time": 13.87
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0934e-04",
+ "loss": 0.4348,
+ "slid_loss": 0.5788,
+ "step": 4279,
+ "time": 12.43
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0932e-04",
+ "loss": 0.5726,
+ "slid_loss": 0.5787,
+ "step": 4280,
+ "time": 13.34
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0930e-04",
+ "loss": 0.6186,
+ "slid_loss": 0.5793,
+ "step": 4281,
+ "time": 13.18
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0928e-04",
+ "loss": 0.5011,
+ "slid_loss": 0.5787,
+ "step": 4282,
+ "time": 13.27
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": "1.0926e-04",
+ "loss": 0.5969,
+ "slid_loss": 0.5794,
+ "step": 4283,
+ "time": 13.98
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0924e-04",
+ "loss": 0.487,
+ "slid_loss": 0.5785,
+ "step": 4284,
+ "time": 12.73
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0922e-04",
+ "loss": 0.6923,
+ "slid_loss": 0.5794,
+ "step": 4285,
+ "time": 13.97
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0920e-04",
+ "loss": 0.5766,
+ "slid_loss": 0.5803,
+ "step": 4286,
+ "time": 13.13
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0918e-04",
+ "loss": 0.6397,
+ "slid_loss": 0.5817,
+ "step": 4287,
+ "time": 12.09
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0916e-04",
+ "loss": 0.5767,
+ "slid_loss": 0.5821,
+ "step": 4288,
+ "time": 11.9
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0914e-04",
+ "loss": 0.5242,
+ "slid_loss": 0.5809,
+ "step": 4289,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0912e-04",
+ "loss": 0.5225,
+ "slid_loss": 0.5807,
+ "step": 4290,
+ "time": 13.06
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0910e-04",
+ "loss": 0.6269,
+ "slid_loss": 0.5812,
+ "step": 4291,
+ "time": 13.24
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0909e-04",
+ "loss": 0.539,
+ "slid_loss": 0.5795,
+ "step": 4292,
+ "time": 13.74
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0907e-04",
+ "loss": 0.5867,
+ "slid_loss": 0.5789,
+ "step": 4293,
+ "time": 13.26
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": "1.0905e-04",
+ "loss": 0.6056,
+ "slid_loss": 0.5793,
+ "step": 4294,
+ "time": 13.77
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0903e-04",
+ "loss": 0.4925,
+ "slid_loss": 0.5786,
+ "step": 4295,
+ "time": 13.63
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0901e-04",
+ "loss": 0.6271,
+ "slid_loss": 0.5792,
+ "step": 4296,
+ "time": 13.14
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0899e-04",
+ "loss": 0.5587,
+ "slid_loss": 0.5789,
+ "step": 4297,
+ "time": 13.63
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0897e-04",
+ "loss": 0.5397,
+ "slid_loss": 0.5787,
+ "step": 4298,
+ "time": 10.81
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0895e-04",
+ "loss": 0.5799,
+ "slid_loss": 0.5789,
+ "step": 4299,
+ "time": 12.0
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0893e-04",
+ "loss": 0.6091,
+ "slid_loss": 0.579,
+ "step": 4300,
+ "time": 13.59
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0891e-04",
+ "loss": 0.5922,
+ "slid_loss": 0.5794,
+ "step": 4301,
+ "time": 10.99
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0889e-04",
+ "loss": 0.5908,
+ "slid_loss": 0.5797,
+ "step": 4302,
+ "time": 13.21
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0887e-04",
+ "loss": 0.5425,
+ "slid_loss": 0.5779,
+ "step": 4303,
+ "time": 13.25
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": "1.0886e-04",
+ "loss": 0.5668,
+ "slid_loss": 0.5782,
+ "step": 4304,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0884e-04",
+ "loss": 0.568,
+ "slid_loss": 0.5789,
+ "step": 4305,
+ "time": 11.64
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0882e-04",
+ "loss": 0.4701,
+ "slid_loss": 0.577,
+ "step": 4306,
+ "time": 13.92
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0880e-04",
+ "loss": 0.5937,
+ "slid_loss": 0.5765,
+ "step": 4307,
+ "time": 12.89
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0878e-04",
+ "loss": 0.5998,
+ "slid_loss": 0.5769,
+ "step": 4308,
+ "time": 13.27
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0876e-04",
+ "loss": 0.5799,
+ "slid_loss": 0.5767,
+ "step": 4309,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0874e-04",
+ "loss": 0.6229,
+ "slid_loss": 0.5768,
+ "step": 4310,
+ "time": 12.13
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0872e-04",
+ "loss": 0.5331,
+ "slid_loss": 0.5759,
+ "step": 4311,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0870e-04",
+ "loss": 0.5623,
+ "slid_loss": 0.5765,
+ "step": 4312,
+ "time": 13.81
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0868e-04",
+ "loss": 0.6547,
+ "slid_loss": 0.5774,
+ "step": 4313,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": "1.0867e-04",
+ "loss": 0.4918,
+ "slid_loss": 0.5766,
+ "step": 4314,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0865e-04",
+ "loss": 0.55,
+ "slid_loss": 0.5755,
+ "step": 4315,
+ "time": 12.82
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0863e-04",
+ "loss": 0.5527,
+ "slid_loss": 0.5749,
+ "step": 4316,
+ "time": 12.77
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0861e-04",
+ "loss": 0.5673,
+ "slid_loss": 0.575,
+ "step": 4317,
+ "time": 12.3
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0859e-04",
+ "loss": 0.515,
+ "slid_loss": 0.5737,
+ "step": 4318,
+ "time": 13.21
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0857e-04",
+ "loss": 0.4943,
+ "slid_loss": 0.5723,
+ "step": 4319,
+ "time": 11.26
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0855e-04",
+ "loss": 0.5458,
+ "slid_loss": 0.572,
+ "step": 4320,
+ "time": 14.21
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0853e-04",
+ "loss": 0.559,
+ "slid_loss": 0.5711,
+ "step": 4321,
+ "time": 12.84
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0852e-04",
+ "loss": 0.5555,
+ "slid_loss": 0.5716,
+ "step": 4322,
+ "time": 12.03
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0850e-04",
+ "loss": 0.6548,
+ "slid_loss": 0.5721,
+ "step": 4323,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0848e-04",
+ "loss": 0.4743,
+ "slid_loss": 0.571,
+ "step": 4324,
+ "time": 12.93
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": "1.0846e-04",
+ "loss": 0.6153,
+ "slid_loss": 0.5713,
+ "step": 4325,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0844e-04",
+ "loss": 0.5441,
+ "slid_loss": 0.5711,
+ "step": 4326,
+ "time": 13.22
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0842e-04",
+ "loss": 0.5518,
+ "slid_loss": 0.5696,
+ "step": 4327,
+ "time": 13.98
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0840e-04",
+ "loss": 0.5863,
+ "slid_loss": 0.5698,
+ "step": 4328,
+ "time": 12.73
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0838e-04",
+ "loss": 0.5523,
+ "slid_loss": 0.5693,
+ "step": 4329,
+ "time": 13.67
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0837e-04",
+ "loss": 0.5799,
+ "slid_loss": 0.5692,
+ "step": 4330,
+ "time": 12.98
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0835e-04",
+ "loss": 0.564,
+ "slid_loss": 0.5694,
+ "step": 4331,
+ "time": 13.47
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0833e-04",
+ "loss": 0.652,
+ "slid_loss": 0.5702,
+ "step": 4332,
+ "time": 12.47
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0831e-04",
+ "loss": 0.6051,
+ "slid_loss": 0.5712,
+ "step": 4333,
+ "time": 13.59
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0829e-04",
+ "loss": 0.5991,
+ "slid_loss": 0.5717,
+ "step": 4334,
+ "time": 10.95
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": "1.0827e-04",
+ "loss": 0.5713,
+ "slid_loss": 0.5722,
+ "step": 4335,
+ "time": 12.79
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0825e-04",
+ "loss": 0.6023,
+ "slid_loss": 0.5718,
+ "step": 4336,
+ "time": 13.93
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0824e-04",
+ "loss": 0.6313,
+ "slid_loss": 0.5725,
+ "step": 4337,
+ "time": 13.2
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0822e-04",
+ "loss": 0.5566,
+ "slid_loss": 0.572,
+ "step": 4338,
+ "time": 11.56
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0820e-04",
+ "loss": 0.536,
+ "slid_loss": 0.5724,
+ "step": 4339,
+ "time": 13.89
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0818e-04",
+ "loss": 0.6758,
+ "slid_loss": 0.5733,
+ "step": 4340,
+ "time": 12.92
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0816e-04",
+ "loss": 0.58,
+ "slid_loss": 0.5734,
+ "step": 4341,
+ "time": 13.47
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0814e-04",
+ "loss": 0.6013,
+ "slid_loss": 0.5736,
+ "step": 4342,
+ "time": 13.41
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0813e-04",
+ "loss": 0.4509,
+ "slid_loss": 0.5723,
+ "step": 4343,
+ "time": 13.96
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0811e-04",
+ "loss": 0.5583,
+ "slid_loss": 0.5726,
+ "step": 4344,
+ "time": 11.31
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0809e-04",
+ "loss": 0.5654,
+ "slid_loss": 0.5728,
+ "step": 4345,
+ "time": 14.76
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": "1.0807e-04",
+ "loss": 0.6198,
+ "slid_loss": 0.5733,
+ "step": 4346,
+ "time": 12.85
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0805e-04",
+ "loss": 0.5075,
+ "slid_loss": 0.5723,
+ "step": 4347,
+ "time": 13.4
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0803e-04",
+ "loss": 0.5373,
+ "slid_loss": 0.5726,
+ "step": 4348,
+ "time": 12.27
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0802e-04",
+ "loss": 0.5579,
+ "slid_loss": 0.5732,
+ "step": 4349,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0800e-04",
+ "loss": 0.5451,
+ "slid_loss": 0.5733,
+ "step": 4350,
+ "time": 13.55
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0798e-04",
+ "loss": 0.5267,
+ "slid_loss": 0.5725,
+ "step": 4351,
+ "time": 14.21
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0796e-04",
+ "loss": 0.4838,
+ "slid_loss": 0.5715,
+ "step": 4352,
+ "time": 14.48
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0794e-04",
+ "loss": 0.6804,
+ "slid_loss": 0.5714,
+ "step": 4353,
+ "time": 14.35
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0793e-04",
+ "loss": 0.5933,
+ "slid_loss": 0.5714,
+ "step": 4354,
+ "time": 13.36
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0791e-04",
+ "loss": 0.5706,
+ "slid_loss": 0.5712,
+ "step": 4355,
+ "time": 13.21
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": "1.0789e-04",
+ "loss": 0.6045,
+ "slid_loss": 0.5714,
+ "step": 4356,
+ "time": 14.15
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0787e-04",
+ "loss": 0.5503,
+ "slid_loss": 0.5718,
+ "step": 4357,
+ "time": 13.2
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0785e-04",
+ "loss": 0.6375,
+ "slid_loss": 0.5721,
+ "step": 4358,
+ "time": 12.97
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0784e-04",
+ "loss": 0.4895,
+ "slid_loss": 0.5709,
+ "step": 4359,
+ "time": 12.8
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0782e-04",
+ "loss": 0.5802,
+ "slid_loss": 0.5714,
+ "step": 4360,
+ "time": 12.95
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0780e-04",
+ "loss": 0.6682,
+ "slid_loss": 0.5722,
+ "step": 4361,
+ "time": 12.8
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0778e-04",
+ "loss": 0.552,
+ "slid_loss": 0.5724,
+ "step": 4362,
+ "time": 12.46
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0776e-04",
+ "loss": 0.534,
+ "slid_loss": 0.5726,
+ "step": 4363,
+ "time": 13.78
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0775e-04",
+ "loss": 0.619,
+ "slid_loss": 0.5729,
+ "step": 4364,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0773e-04",
+ "loss": 0.5705,
+ "slid_loss": 0.5728,
+ "step": 4365,
+ "time": 14.74
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": "1.0771e-04",
+ "loss": 0.5997,
+ "slid_loss": 0.5729,
+ "step": 4366,
+ "time": 12.9
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0769e-04",
+ "loss": 0.5418,
+ "slid_loss": 0.5723,
+ "step": 4367,
+ "time": 11.29
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0767e-04",
+ "loss": 0.5783,
+ "slid_loss": 0.5726,
+ "step": 4368,
+ "time": 12.84
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0766e-04",
+ "loss": 0.5343,
+ "slid_loss": 0.5718,
+ "step": 4369,
+ "time": 12.75
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0764e-04",
+ "loss": 0.5112,
+ "slid_loss": 0.5711,
+ "step": 4370,
+ "time": 13.89
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0762e-04",
+ "loss": 0.5011,
+ "slid_loss": 0.5704,
+ "step": 4371,
+ "time": 12.78
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0760e-04",
+ "loss": 0.6308,
+ "slid_loss": 0.571,
+ "step": 4372,
+ "time": 13.2
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0758e-04",
+ "loss": 0.541,
+ "slid_loss": 0.5708,
+ "step": 4373,
+ "time": 13.99
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0757e-04",
+ "loss": 0.5747,
+ "slid_loss": 0.57,
+ "step": 4374,
+ "time": 13.53
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0755e-04",
+ "loss": 0.5601,
+ "slid_loss": 0.5696,
+ "step": 4375,
+ "time": 13.21
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0753e-04",
+ "loss": 0.5634,
+ "slid_loss": 0.5697,
+ "step": 4376,
+ "time": 12.84
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": "1.0751e-04",
+ "loss": 0.4931,
+ "slid_loss": 0.568,
+ "step": 4377,
+ "time": 11.92
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0750e-04",
+ "loss": 0.6246,
+ "slid_loss": 0.5681,
+ "step": 4378,
+ "time": 13.85
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0748e-04",
+ "loss": 0.5722,
+ "slid_loss": 0.5694,
+ "step": 4379,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0746e-04",
+ "loss": 0.5502,
+ "slid_loss": 0.5692,
+ "step": 4380,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0744e-04",
+ "loss": 0.5715,
+ "slid_loss": 0.5687,
+ "step": 4381,
+ "time": 11.64
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0743e-04",
+ "loss": 0.5815,
+ "slid_loss": 0.5696,
+ "step": 4382,
+ "time": 12.74
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0741e-04",
+ "loss": 0.7127,
+ "slid_loss": 0.5707,
+ "step": 4383,
+ "time": 13.06
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0739e-04",
+ "loss": 0.6869,
+ "slid_loss": 0.5727,
+ "step": 4384,
+ "time": 14.21
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0737e-04",
+ "loss": 0.6397,
+ "slid_loss": 0.5722,
+ "step": 4385,
+ "time": 13.11
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0736e-04",
+ "loss": 0.5562,
+ "slid_loss": 0.572,
+ "step": 4386,
+ "time": 14.08
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": "1.0734e-04",
+ "loss": 0.5471,
+ "slid_loss": 0.5711,
+ "step": 4387,
+ "time": 13.17
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0732e-04",
+ "loss": 0.5869,
+ "slid_loss": 0.5712,
+ "step": 4388,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0730e-04",
+ "loss": 0.5595,
+ "slid_loss": 0.5715,
+ "step": 4389,
+ "time": 14.19
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0729e-04",
+ "loss": 0.5296,
+ "slid_loss": 0.5716,
+ "step": 4390,
+ "time": 13.65
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0727e-04",
+ "loss": 0.4787,
+ "slid_loss": 0.5701,
+ "step": 4391,
+ "time": 12.15
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0725e-04",
+ "loss": 0.5321,
+ "slid_loss": 0.57,
+ "step": 4392,
+ "time": 12.07
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0723e-04",
+ "loss": 0.5056,
+ "slid_loss": 0.5692,
+ "step": 4393,
+ "time": 13.65
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0722e-04",
+ "loss": 0.5397,
+ "slid_loss": 0.5686,
+ "step": 4394,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0720e-04",
+ "loss": 0.553,
+ "slid_loss": 0.5692,
+ "step": 4395,
+ "time": 13.83
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0718e-04",
+ "loss": 0.6374,
+ "slid_loss": 0.5693,
+ "step": 4396,
+ "time": 13.52
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0716e-04",
+ "loss": 0.4997,
+ "slid_loss": 0.5687,
+ "step": 4397,
+ "time": 11.85
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": "1.0715e-04",
+ "loss": 0.6042,
+ "slid_loss": 0.5693,
+ "step": 4398,
+ "time": 13.94
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0713e-04",
+ "loss": 0.4681,
+ "slid_loss": 0.5682,
+ "step": 4399,
+ "time": 13.07
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0711e-04",
+ "loss": 0.5529,
+ "slid_loss": 0.5676,
+ "step": 4400,
+ "time": 12.82
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0710e-04",
+ "loss": 0.5447,
+ "slid_loss": 0.5672,
+ "step": 4401,
+ "time": 13.32
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0708e-04",
+ "loss": 0.5756,
+ "slid_loss": 0.567,
+ "step": 4402,
+ "time": 13.92
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0706e-04",
+ "loss": 0.5784,
+ "slid_loss": 0.5674,
+ "step": 4403,
+ "time": 13.82
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0704e-04",
+ "loss": 0.5788,
+ "slid_loss": 0.5675,
+ "step": 4404,
+ "time": 12.69
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0703e-04",
+ "loss": 0.5963,
+ "slid_loss": 0.5678,
+ "step": 4405,
+ "time": 12.97
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0701e-04",
+ "loss": 0.5728,
+ "slid_loss": 0.5688,
+ "step": 4406,
+ "time": 13.77
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0699e-04",
+ "loss": 0.5906,
+ "slid_loss": 0.5688,
+ "step": 4407,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": "1.0698e-04",
+ "loss": 0.5218,
+ "slid_loss": 0.568,
+ "step": 4408,
+ "time": 13.74
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0696e-04",
+ "loss": 0.5883,
+ "slid_loss": 0.5681,
+ "step": 4409,
+ "time": 13.06
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0694e-04",
+ "loss": 0.5248,
+ "slid_loss": 0.5671,
+ "step": 4410,
+ "time": 10.85
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0692e-04",
+ "loss": 0.5462,
+ "slid_loss": 0.5672,
+ "step": 4411,
+ "time": 12.77
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0691e-04",
+ "loss": 0.5649,
+ "slid_loss": 0.5673,
+ "step": 4412,
+ "time": 13.36
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0689e-04",
+ "loss": 0.59,
+ "slid_loss": 0.5666,
+ "step": 4413,
+ "time": 12.67
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0687e-04",
+ "loss": 0.5554,
+ "slid_loss": 0.5672,
+ "step": 4414,
+ "time": 13.92
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0686e-04",
+ "loss": 0.5741,
+ "slid_loss": 0.5675,
+ "step": 4415,
+ "time": 13.47
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0684e-04",
+ "loss": 0.5373,
+ "slid_loss": 0.5673,
+ "step": 4416,
+ "time": 11.59
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0682e-04",
+ "loss": 0.523,
+ "slid_loss": 0.5669,
+ "step": 4417,
+ "time": 11.37
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0681e-04",
+ "loss": 0.565,
+ "slid_loss": 0.5674,
+ "step": 4418,
+ "time": 11.39
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": "1.0679e-04",
+ "loss": 0.5234,
+ "slid_loss": 0.5677,
+ "step": 4419,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0677e-04",
+ "loss": 0.5417,
+ "slid_loss": 0.5676,
+ "step": 4420,
+ "time": 10.87
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0675e-04",
+ "loss": 0.5772,
+ "slid_loss": 0.5678,
+ "step": 4421,
+ "time": 11.54
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0674e-04",
+ "loss": 0.5095,
+ "slid_loss": 0.5674,
+ "step": 4422,
+ "time": 13.79
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0672e-04",
+ "loss": 0.5318,
+ "slid_loss": 0.5661,
+ "step": 4423,
+ "time": 12.64
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0670e-04",
+ "loss": 0.5761,
+ "slid_loss": 0.5671,
+ "step": 4424,
+ "time": 13.66
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0669e-04",
+ "loss": 0.5633,
+ "slid_loss": 0.5666,
+ "step": 4425,
+ "time": 13.37
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0667e-04",
+ "loss": 0.5,
+ "slid_loss": 0.5662,
+ "step": 4426,
+ "time": 13.65
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0665e-04",
+ "loss": 0.5033,
+ "slid_loss": 0.5657,
+ "step": 4427,
+ "time": 13.82
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0664e-04",
+ "loss": 0.5214,
+ "slid_loss": 0.5651,
+ "step": 4428,
+ "time": 12.88
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": "1.0662e-04",
+ "loss": 0.5183,
+ "slid_loss": 0.5647,
+ "step": 4429,
+ "time": 13.27
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0660e-04",
+ "loss": 0.6519,
+ "slid_loss": 0.5654,
+ "step": 4430,
+ "time": 12.7
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0659e-04",
+ "loss": 0.6079,
+ "slid_loss": 0.5659,
+ "step": 4431,
+ "time": 12.77
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0657e-04",
+ "loss": 0.59,
+ "slid_loss": 0.5653,
+ "step": 4432,
+ "time": 13.91
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0655e-04",
+ "loss": 0.6267,
+ "slid_loss": 0.5655,
+ "step": 4433,
+ "time": 14.08
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0654e-04",
+ "loss": 0.5343,
+ "slid_loss": 0.5648,
+ "step": 4434,
+ "time": 13.32
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0652e-04",
+ "loss": 0.5887,
+ "slid_loss": 0.565,
+ "step": 4435,
+ "time": 12.92
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0650e-04",
+ "loss": 0.5544,
+ "slid_loss": 0.5645,
+ "step": 4436,
+ "time": 13.52
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0649e-04",
+ "loss": 0.5707,
+ "slid_loss": 0.5639,
+ "step": 4437,
+ "time": 12.46
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0647e-04",
+ "loss": 0.6036,
+ "slid_loss": 0.5644,
+ "step": 4438,
+ "time": 13.51
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": "1.0645e-04",
+ "loss": 0.6265,
+ "slid_loss": 0.5653,
+ "step": 4439,
+ "time": 14.18
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0644e-04",
+ "loss": 0.5398,
+ "slid_loss": 0.5639,
+ "step": 4440,
+ "time": 12.97
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0642e-04",
+ "loss": 0.5426,
+ "slid_loss": 0.5635,
+ "step": 4441,
+ "time": 13.14
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0641e-04",
+ "loss": 0.4603,
+ "slid_loss": 0.5621,
+ "step": 4442,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0639e-04",
+ "loss": 0.5677,
+ "slid_loss": 0.5633,
+ "step": 4443,
+ "time": 12.77
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0637e-04",
+ "loss": 0.507,
+ "slid_loss": 0.5628,
+ "step": 4444,
+ "time": 13.76
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0636e-04",
+ "loss": 0.5395,
+ "slid_loss": 0.5625,
+ "step": 4445,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0634e-04",
+ "loss": 0.6298,
+ "slid_loss": 0.5626,
+ "step": 4446,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0632e-04",
+ "loss": 0.5373,
+ "slid_loss": 0.5629,
+ "step": 4447,
+ "time": 13.82
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0631e-04",
+ "loss": 0.5597,
+ "slid_loss": 0.5632,
+ "step": 4448,
+ "time": 12.44
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0629e-04",
+ "loss": 0.646,
+ "slid_loss": 0.564,
+ "step": 4449,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": "1.0627e-04",
+ "loss": 0.5208,
+ "slid_loss": 0.5638,
+ "step": 4450,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0626e-04",
+ "loss": 0.5568,
+ "slid_loss": 0.5641,
+ "step": 4451,
+ "time": 12.29
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0624e-04",
+ "loss": 0.5771,
+ "slid_loss": 0.565,
+ "step": 4452,
+ "time": 12.08
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0623e-04",
+ "loss": 0.5579,
+ "slid_loss": 0.5638,
+ "step": 4453,
+ "time": 13.14
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0621e-04",
+ "loss": 0.5298,
+ "slid_loss": 0.5632,
+ "step": 4454,
+ "time": 14.23
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0619e-04",
+ "loss": 0.5334,
+ "slid_loss": 0.5628,
+ "step": 4455,
+ "time": 11.79
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0618e-04",
+ "loss": 0.6166,
+ "slid_loss": 0.5629,
+ "step": 4456,
+ "time": 12.52
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0616e-04",
+ "loss": 0.6536,
+ "slid_loss": 0.564,
+ "step": 4457,
+ "time": 13.02
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0615e-04",
+ "loss": 0.5476,
+ "slid_loss": 0.5631,
+ "step": 4458,
+ "time": 12.34
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0613e-04",
+ "loss": 0.4697,
+ "slid_loss": 0.5629,
+ "step": 4459,
+ "time": 13.13
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": "1.0611e-04",
+ "loss": 0.5542,
+ "slid_loss": 0.5626,
+ "step": 4460,
+ "time": 11.71
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0610e-04",
+ "loss": 0.4617,
+ "slid_loss": 0.5605,
+ "step": 4461,
+ "time": 13.2
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0608e-04",
+ "loss": 0.6238,
+ "slid_loss": 0.5612,
+ "step": 4462,
+ "time": 11.9
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0607e-04",
+ "loss": 0.5374,
+ "slid_loss": 0.5613,
+ "step": 4463,
+ "time": 13.44
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0605e-04",
+ "loss": 0.4013,
+ "slid_loss": 0.5591,
+ "step": 4464,
+ "time": 12.15
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0603e-04",
+ "loss": 0.4663,
+ "slid_loss": 0.5581,
+ "step": 4465,
+ "time": 13.56
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0602e-04",
+ "loss": 0.5516,
+ "slid_loss": 0.5576,
+ "step": 4466,
+ "time": 13.04
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0600e-04",
+ "loss": 0.5523,
+ "slid_loss": 0.5577,
+ "step": 4467,
+ "time": 12.39
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0599e-04",
+ "loss": 0.5163,
+ "slid_loss": 0.5571,
+ "step": 4468,
+ "time": 12.54
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0597e-04",
+ "loss": 0.6144,
+ "slid_loss": 0.5579,
+ "step": 4469,
+ "time": 14.16
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0595e-04",
+ "loss": 0.6424,
+ "slid_loss": 0.5592,
+ "step": 4470,
+ "time": 14.28
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": "1.0594e-04",
+ "loss": 0.5203,
+ "slid_loss": 0.5594,
+ "step": 4471,
+ "time": 13.97
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0592e-04",
+ "loss": 0.5061,
+ "slid_loss": 0.5581,
+ "step": 4472,
+ "time": 13.68
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0591e-04",
+ "loss": 0.5977,
+ "slid_loss": 0.5587,
+ "step": 4473,
+ "time": 12.78
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0589e-04",
+ "loss": 0.5824,
+ "slid_loss": 0.5588,
+ "step": 4474,
+ "time": 13.47
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0587e-04",
+ "loss": 0.611,
+ "slid_loss": 0.5593,
+ "step": 4475,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0586e-04",
+ "loss": 0.5908,
+ "slid_loss": 0.5596,
+ "step": 4476,
+ "time": 12.91
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0584e-04",
+ "loss": 0.558,
+ "slid_loss": 0.5602,
+ "step": 4477,
+ "time": 13.82
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0583e-04",
+ "loss": 0.6553,
+ "slid_loss": 0.5605,
+ "step": 4478,
+ "time": 13.75
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0581e-04",
+ "loss": 0.5998,
+ "slid_loss": 0.5608,
+ "step": 4479,
+ "time": 11.89
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0580e-04",
+ "loss": 0.4902,
+ "slid_loss": 0.5602,
+ "step": 4480,
+ "time": 12.78
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": "1.0578e-04",
+ "loss": 0.5863,
+ "slid_loss": 0.5603,
+ "step": 4481,
+ "time": 13.96
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0576e-04",
+ "loss": 0.5728,
+ "slid_loss": 0.5602,
+ "step": 4482,
+ "time": 11.09
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0575e-04",
+ "loss": 0.4727,
+ "slid_loss": 0.5578,
+ "step": 4483,
+ "time": 13.49
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0573e-04",
+ "loss": 0.5703,
+ "slid_loss": 0.5567,
+ "step": 4484,
+ "time": 14.15
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0572e-04",
+ "loss": 0.5005,
+ "slid_loss": 0.5553,
+ "step": 4485,
+ "time": 12.82
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0570e-04",
+ "loss": 0.5055,
+ "slid_loss": 0.5548,
+ "step": 4486,
+ "time": 13.54
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0569e-04",
+ "loss": 0.4682,
+ "slid_loss": 0.554,
+ "step": 4487,
+ "time": 13.31
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0567e-04",
+ "loss": 0.562,
+ "slid_loss": 0.5537,
+ "step": 4488,
+ "time": 13.75
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0566e-04",
+ "loss": 0.5304,
+ "slid_loss": 0.5534,
+ "step": 4489,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0564e-04",
+ "loss": 0.5081,
+ "slid_loss": 0.5532,
+ "step": 4490,
+ "time": 13.04
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": "1.0562e-04",
+ "loss": 0.5458,
+ "slid_loss": 0.5539,
+ "step": 4491,
+ "time": 13.76
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0561e-04",
+ "loss": 0.5107,
+ "slid_loss": 0.5537,
+ "step": 4492,
+ "time": 13.26
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0559e-04",
+ "loss": 0.5098,
+ "slid_loss": 0.5537,
+ "step": 4493,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0558e-04",
+ "loss": 0.6095,
+ "slid_loss": 0.5544,
+ "step": 4494,
+ "time": 12.45
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0556e-04",
+ "loss": 0.5212,
+ "slid_loss": 0.5541,
+ "step": 4495,
+ "time": 11.89
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0555e-04",
+ "loss": 0.5963,
+ "slid_loss": 0.5537,
+ "step": 4496,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0553e-04",
+ "loss": 0.592,
+ "slid_loss": 0.5546,
+ "step": 4497,
+ "time": 13.27
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0552e-04",
+ "loss": 0.6565,
+ "slid_loss": 0.5551,
+ "step": 4498,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0550e-04",
+ "loss": 0.5105,
+ "slid_loss": 0.5556,
+ "step": 4499,
+ "time": 11.91
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0549e-04",
+ "loss": 0.5513,
+ "slid_loss": 0.5556,
+ "step": 4500,
+ "time": 14.29
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0547e-04",
+ "loss": 0.5206,
+ "slid_loss": 0.5553,
+ "step": 4501,
+ "time": 13.26
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": "1.0546e-04",
+ "loss": 0.5941,
+ "slid_loss": 0.5555,
+ "step": 4502,
+ "time": 13.95
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0544e-04",
+ "loss": 0.5431,
+ "slid_loss": 0.5551,
+ "step": 4503,
+ "time": 13.57
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0543e-04",
+ "loss": 0.6153,
+ "slid_loss": 0.5555,
+ "step": 4504,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0541e-04",
+ "loss": 0.5241,
+ "slid_loss": 0.5548,
+ "step": 4505,
+ "time": 13.8
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0539e-04",
+ "loss": 0.6102,
+ "slid_loss": 0.5552,
+ "step": 4506,
+ "time": 11.06
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0538e-04",
+ "loss": 0.4648,
+ "slid_loss": 0.5539,
+ "step": 4507,
+ "time": 13.62
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0536e-04",
+ "loss": 0.6178,
+ "slid_loss": 0.5549,
+ "step": 4508,
+ "time": 13.56
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0535e-04",
+ "loss": 0.4409,
+ "slid_loss": 0.5534,
+ "step": 4509,
+ "time": 13.16
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0533e-04",
+ "loss": 0.554,
+ "slid_loss": 0.5537,
+ "step": 4510,
+ "time": 13.76
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0532e-04",
+ "loss": 0.4906,
+ "slid_loss": 0.5531,
+ "step": 4511,
+ "time": 12.69
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": "1.0530e-04",
+ "loss": 0.5694,
+ "slid_loss": 0.5532,
+ "step": 4512,
+ "time": 13.43
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0529e-04",
+ "loss": 0.5759,
+ "slid_loss": 0.553,
+ "step": 4513,
+ "time": 13.41
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0527e-04",
+ "loss": 0.5423,
+ "slid_loss": 0.5529,
+ "step": 4514,
+ "time": 13.4
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0526e-04",
+ "loss": 0.6554,
+ "slid_loss": 0.5537,
+ "step": 4515,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0524e-04",
+ "loss": 0.561,
+ "slid_loss": 0.5539,
+ "step": 4516,
+ "time": 12.76
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0523e-04",
+ "loss": 0.5369,
+ "slid_loss": 0.5541,
+ "step": 4517,
+ "time": 14.18
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0521e-04",
+ "loss": 0.5693,
+ "slid_loss": 0.5541,
+ "step": 4518,
+ "time": 12.57
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0520e-04",
+ "loss": 0.621,
+ "slid_loss": 0.5551,
+ "step": 4519,
+ "time": 11.52
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0518e-04",
+ "loss": 0.5806,
+ "slid_loss": 0.5555,
+ "step": 4520,
+ "time": 12.83
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0517e-04",
+ "loss": 0.5694,
+ "slid_loss": 0.5554,
+ "step": 4521,
+ "time": 12.95
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0516e-04",
+ "loss": 0.553,
+ "slid_loss": 0.5559,
+ "step": 4522,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": "1.0514e-04",
+ "loss": 0.4836,
+ "slid_loss": 0.5554,
+ "step": 4523,
+ "time": 14.27
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0513e-04",
+ "loss": 0.649,
+ "slid_loss": 0.5561,
+ "step": 4524,
+ "time": 11.86
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0511e-04",
+ "loss": 0.5419,
+ "slid_loss": 0.5559,
+ "step": 4525,
+ "time": 13.86
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0510e-04",
+ "loss": 0.5648,
+ "slid_loss": 0.5565,
+ "step": 4526,
+ "time": 13.26
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0508e-04",
+ "loss": 0.527,
+ "slid_loss": 0.5568,
+ "step": 4527,
+ "time": 13.78
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0507e-04",
+ "loss": 0.5219,
+ "slid_loss": 0.5568,
+ "step": 4528,
+ "time": 14.12
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0505e-04",
+ "loss": 0.673,
+ "slid_loss": 0.5583,
+ "step": 4529,
+ "time": 13.59
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0504e-04",
+ "loss": 0.5212,
+ "slid_loss": 0.557,
+ "step": 4530,
+ "time": 13.93
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0502e-04",
+ "loss": 0.5785,
+ "slid_loss": 0.5567,
+ "step": 4531,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0501e-04",
+ "loss": 0.5516,
+ "slid_loss": 0.5563,
+ "step": 4532,
+ "time": 13.73
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": "1.0499e-04",
+ "loss": 0.5774,
+ "slid_loss": 0.5558,
+ "step": 4533,
+ "time": 13.38
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0498e-04",
+ "loss": 0.5418,
+ "slid_loss": 0.5559,
+ "step": 4534,
+ "time": 12.9
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0496e-04",
+ "loss": 0.5454,
+ "slid_loss": 0.5555,
+ "step": 4535,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0495e-04",
+ "loss": 0.6359,
+ "slid_loss": 0.5563,
+ "step": 4536,
+ "time": 13.23
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0493e-04",
+ "loss": 0.5451,
+ "slid_loss": 0.556,
+ "step": 4537,
+ "time": 11.88
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0492e-04",
+ "loss": 0.5623,
+ "slid_loss": 0.5556,
+ "step": 4538,
+ "time": 13.51
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0491e-04",
+ "loss": 0.4879,
+ "slid_loss": 0.5542,
+ "step": 4539,
+ "time": 14.21
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0489e-04",
+ "loss": 0.4647,
+ "slid_loss": 0.5535,
+ "step": 4540,
+ "time": 10.9
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0488e-04",
+ "loss": 0.5222,
+ "slid_loss": 0.5533,
+ "step": 4541,
+ "time": 13.03
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0486e-04",
+ "loss": 0.6058,
+ "slid_loss": 0.5547,
+ "step": 4542,
+ "time": 11.9
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": "1.0485e-04",
+ "loss": 0.6018,
+ "slid_loss": 0.5551,
+ "step": 4543,
+ "time": 13.03
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0483e-04",
+ "loss": 0.5497,
+ "slid_loss": 0.5555,
+ "step": 4544,
+ "time": 11.65
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0482e-04",
+ "loss": 0.5423,
+ "slid_loss": 0.5555,
+ "step": 4545,
+ "time": 13.22
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0481e-04",
+ "loss": 0.5609,
+ "slid_loss": 0.5549,
+ "step": 4546,
+ "time": 12.83
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0479e-04",
+ "loss": 0.4974,
+ "slid_loss": 0.5545,
+ "step": 4547,
+ "time": 12.37
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0478e-04",
+ "loss": 0.4871,
+ "slid_loss": 0.5537,
+ "step": 4548,
+ "time": 13.54
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0476e-04",
+ "loss": 0.5857,
+ "slid_loss": 0.5531,
+ "step": 4549,
+ "time": 12.77
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0475e-04",
+ "loss": 0.5199,
+ "slid_loss": 0.5531,
+ "step": 4550,
+ "time": 12.12
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0473e-04",
+ "loss": 0.5687,
+ "slid_loss": 0.5532,
+ "step": 4551,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0472e-04",
+ "loss": 0.5518,
+ "slid_loss": 0.553,
+ "step": 4552,
+ "time": 11.91
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0471e-04",
+ "loss": 0.5513,
+ "slid_loss": 0.5529,
+ "step": 4553,
+ "time": 12.69
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": "1.0469e-04",
+ "loss": 0.6022,
+ "slid_loss": 0.5536,
+ "step": 4554,
+ "time": 13.21
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0468e-04",
+ "loss": 0.5103,
+ "slid_loss": 0.5534,
+ "step": 4555,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0466e-04",
+ "loss": 0.5465,
+ "slid_loss": 0.5527,
+ "step": 4556,
+ "time": 13.19
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0465e-04",
+ "loss": 0.5244,
+ "slid_loss": 0.5514,
+ "step": 4557,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0463e-04",
+ "loss": 0.5232,
+ "slid_loss": 0.5512,
+ "step": 4558,
+ "time": 12.91
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0462e-04",
+ "loss": 0.4879,
+ "slid_loss": 0.5514,
+ "step": 4559,
+ "time": 13.85
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0461e-04",
+ "loss": 0.5309,
+ "slid_loss": 0.5511,
+ "step": 4560,
+ "time": 12.25
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0459e-04",
+ "loss": 0.5395,
+ "slid_loss": 0.5519,
+ "step": 4561,
+ "time": 12.24
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0458e-04",
+ "loss": 0.561,
+ "slid_loss": 0.5513,
+ "step": 4562,
+ "time": 12.85
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0456e-04",
+ "loss": 0.5507,
+ "slid_loss": 0.5514,
+ "step": 4563,
+ "time": 12.94
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": "1.0455e-04",
+ "loss": 0.5411,
+ "slid_loss": 0.5528,
+ "step": 4564,
+ "time": 13.06
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0454e-04",
+ "loss": 0.5512,
+ "slid_loss": 0.5536,
+ "step": 4565,
+ "time": 12.25
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0452e-04",
+ "loss": 0.5925,
+ "slid_loss": 0.5541,
+ "step": 4566,
+ "time": 13.25
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0451e-04",
+ "loss": 0.627,
+ "slid_loss": 0.5548,
+ "step": 4567,
+ "time": 13.62
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0449e-04",
+ "loss": 0.5893,
+ "slid_loss": 0.5555,
+ "step": 4568,
+ "time": 13.74
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0448e-04",
+ "loss": 0.5069,
+ "slid_loss": 0.5545,
+ "step": 4569,
+ "time": 11.82
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0447e-04",
+ "loss": 0.4982,
+ "slid_loss": 0.553,
+ "step": 4570,
+ "time": 13.31
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0445e-04",
+ "loss": 0.5289,
+ "slid_loss": 0.5531,
+ "step": 4571,
+ "time": 13.97
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0444e-04",
+ "loss": 0.527,
+ "slid_loss": 0.5533,
+ "step": 4572,
+ "time": 13.19
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0443e-04",
+ "loss": 0.5932,
+ "slid_loss": 0.5533,
+ "step": 4573,
+ "time": 11.97
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0441e-04",
+ "loss": 0.6591,
+ "slid_loss": 0.554,
+ "step": 4574,
+ "time": 12.57
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": "1.0440e-04",
+ "loss": 0.5524,
+ "slid_loss": 0.5535,
+ "step": 4575,
+ "time": 13.04
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0438e-04",
+ "loss": 0.5381,
+ "slid_loss": 0.5529,
+ "step": 4576,
+ "time": 13.71
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0437e-04",
+ "loss": 0.5816,
+ "slid_loss": 0.5532,
+ "step": 4577,
+ "time": 11.91
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0436e-04",
+ "loss": 0.5995,
+ "slid_loss": 0.5526,
+ "step": 4578,
+ "time": 12.92
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0434e-04",
+ "loss": 0.5709,
+ "slid_loss": 0.5523,
+ "step": 4579,
+ "time": 11.89
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0433e-04",
+ "loss": 0.4961,
+ "slid_loss": 0.5524,
+ "step": 4580,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0432e-04",
+ "loss": 0.6488,
+ "slid_loss": 0.553,
+ "step": 4581,
+ "time": 14.17
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0430e-04",
+ "loss": 0.5538,
+ "slid_loss": 0.5528,
+ "step": 4582,
+ "time": 13.34
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0429e-04",
+ "loss": 0.4702,
+ "slid_loss": 0.5528,
+ "step": 4583,
+ "time": 12.6
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0427e-04",
+ "loss": 0.5966,
+ "slid_loss": 0.553,
+ "step": 4584,
+ "time": 13.4
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": "1.0426e-04",
+ "loss": 0.4817,
+ "slid_loss": 0.5529,
+ "step": 4585,
+ "time": 11.86
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0425e-04",
+ "loss": 0.6157,
+ "slid_loss": 0.554,
+ "step": 4586,
+ "time": 12.25
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0423e-04",
+ "loss": 0.4919,
+ "slid_loss": 0.5542,
+ "step": 4587,
+ "time": 13.8
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0422e-04",
+ "loss": 0.5944,
+ "slid_loss": 0.5545,
+ "step": 4588,
+ "time": 14.07
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0421e-04",
+ "loss": 0.5339,
+ "slid_loss": 0.5546,
+ "step": 4589,
+ "time": 13.56
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0419e-04",
+ "loss": 0.5287,
+ "slid_loss": 0.5548,
+ "step": 4590,
+ "time": 13.37
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0418e-04",
+ "loss": 0.4689,
+ "slid_loss": 0.554,
+ "step": 4591,
+ "time": 11.5
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0417e-04",
+ "loss": 0.5358,
+ "slid_loss": 0.5542,
+ "step": 4592,
+ "time": 12.17
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0415e-04",
+ "loss": 0.5776,
+ "slid_loss": 0.5549,
+ "step": 4593,
+ "time": 11.31
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0414e-04",
+ "loss": 0.5559,
+ "slid_loss": 0.5544,
+ "step": 4594,
+ "time": 13.2
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0413e-04",
+ "loss": 0.4763,
+ "slid_loss": 0.5539,
+ "step": 4595,
+ "time": 12.98
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": "1.0411e-04",
+ "loss": 0.5465,
+ "slid_loss": 0.5534,
+ "step": 4596,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0410e-04",
+ "loss": 0.5669,
+ "slid_loss": 0.5532,
+ "step": 4597,
+ "time": 13.77
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0409e-04",
+ "loss": 0.5102,
+ "slid_loss": 0.5517,
+ "step": 4598,
+ "time": 13.76
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0407e-04",
+ "loss": 0.5412,
+ "slid_loss": 0.552,
+ "step": 4599,
+ "time": 12.95
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0406e-04",
+ "loss": 0.4852,
+ "slid_loss": 0.5514,
+ "step": 4600,
+ "time": 11.28
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0405e-04",
+ "loss": 0.5492,
+ "slid_loss": 0.5517,
+ "step": 4601,
+ "time": 13.69
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0403e-04",
+ "loss": 0.4833,
+ "slid_loss": 0.5506,
+ "step": 4602,
+ "time": 12.83
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0402e-04",
+ "loss": 0.6499,
+ "slid_loss": 0.5516,
+ "step": 4603,
+ "time": 13.69
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0401e-04",
+ "loss": 0.4999,
+ "slid_loss": 0.5505,
+ "step": 4604,
+ "time": 13.63
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0399e-04",
+ "loss": 0.5291,
+ "slid_loss": 0.5505,
+ "step": 4605,
+ "time": 12.35
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": "1.0398e-04",
+ "loss": 0.5613,
+ "slid_loss": 0.55,
+ "step": 4606,
+ "time": 12.02
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0397e-04",
+ "loss": 0.6657,
+ "slid_loss": 0.552,
+ "step": 4607,
+ "time": 14.47
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0395e-04",
+ "loss": 0.6183,
+ "slid_loss": 0.552,
+ "step": 4608,
+ "time": 12.57
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0394e-04",
+ "loss": 0.5911,
+ "slid_loss": 0.5535,
+ "step": 4609,
+ "time": 13.9
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0393e-04",
+ "loss": 0.589,
+ "slid_loss": 0.5539,
+ "step": 4610,
+ "time": 13.02
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0392e-04",
+ "loss": 0.5499,
+ "slid_loss": 0.5545,
+ "step": 4611,
+ "time": 12.81
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0390e-04",
+ "loss": 0.5483,
+ "slid_loss": 0.5543,
+ "step": 4612,
+ "time": 12.98
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0389e-04",
+ "loss": 0.5976,
+ "slid_loss": 0.5545,
+ "step": 4613,
+ "time": 13.44
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0388e-04",
+ "loss": 0.6254,
+ "slid_loss": 0.5553,
+ "step": 4614,
+ "time": 13.57
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0386e-04",
+ "loss": 0.6337,
+ "slid_loss": 0.5551,
+ "step": 4615,
+ "time": 12.73
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": "1.0385e-04",
+ "loss": 0.5324,
+ "slid_loss": 0.5548,
+ "step": 4616,
+ "time": 13.94
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0384e-04",
+ "loss": 0.5672,
+ "slid_loss": 0.5551,
+ "step": 4617,
+ "time": 14.06
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0383e-04",
+ "loss": 0.5773,
+ "slid_loss": 0.5552,
+ "step": 4618,
+ "time": 12.81
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0381e-04",
+ "loss": 0.5793,
+ "slid_loss": 0.5548,
+ "step": 4619,
+ "time": 11.82
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0380e-04",
+ "loss": 0.5341,
+ "slid_loss": 0.5543,
+ "step": 4620,
+ "time": 13.12
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0379e-04",
+ "loss": 0.5612,
+ "slid_loss": 0.5542,
+ "step": 4621,
+ "time": 13.84
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0377e-04",
+ "loss": 0.4973,
+ "slid_loss": 0.5537,
+ "step": 4622,
+ "time": 14.14
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0376e-04",
+ "loss": 0.5334,
+ "slid_loss": 0.5542,
+ "step": 4623,
+ "time": 10.62
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0375e-04",
+ "loss": 0.5235,
+ "slid_loss": 0.5529,
+ "step": 4624,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0374e-04",
+ "loss": 0.6071,
+ "slid_loss": 0.5536,
+ "step": 4625,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0372e-04",
+ "loss": 0.4875,
+ "slid_loss": 0.5528,
+ "step": 4626,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": "1.0371e-04",
+ "loss": 0.5622,
+ "slid_loss": 0.5532,
+ "step": 4627,
+ "time": 11.87
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0370e-04",
+ "loss": 0.5594,
+ "slid_loss": 0.5535,
+ "step": 4628,
+ "time": 13.68
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0369e-04",
+ "loss": 0.5694,
+ "slid_loss": 0.5525,
+ "step": 4629,
+ "time": 13.04
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0367e-04",
+ "loss": 0.5362,
+ "slid_loss": 0.5526,
+ "step": 4630,
+ "time": 14.1
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0366e-04",
+ "loss": 0.5336,
+ "slid_loss": 0.5522,
+ "step": 4631,
+ "time": 13.38
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0365e-04",
+ "loss": 0.5767,
+ "slid_loss": 0.5524,
+ "step": 4632,
+ "time": 13.89
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0363e-04",
+ "loss": 0.6667,
+ "slid_loss": 0.5533,
+ "step": 4633,
+ "time": 12.51
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0362e-04",
+ "loss": 0.4877,
+ "slid_loss": 0.5528,
+ "step": 4634,
+ "time": 10.89
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0361e-04",
+ "loss": 0.6421,
+ "slid_loss": 0.5538,
+ "step": 4635,
+ "time": 14.21
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0360e-04",
+ "loss": 0.4801,
+ "slid_loss": 0.5522,
+ "step": 4636,
+ "time": 14.13
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": "1.0358e-04",
+ "loss": 0.5815,
+ "slid_loss": 0.5526,
+ "step": 4637,
+ "time": 13.63
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0357e-04",
+ "loss": 0.6085,
+ "slid_loss": 0.553,
+ "step": 4638,
+ "time": 13.43
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0356e-04",
+ "loss": 0.5816,
+ "slid_loss": 0.554,
+ "step": 4639,
+ "time": 12.96
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0355e-04",
+ "loss": 0.6063,
+ "slid_loss": 0.5554,
+ "step": 4640,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0353e-04",
+ "loss": 0.5629,
+ "slid_loss": 0.5558,
+ "step": 4641,
+ "time": 13.23
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0352e-04",
+ "loss": 0.5163,
+ "slid_loss": 0.5549,
+ "step": 4642,
+ "time": 12.78
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0351e-04",
+ "loss": 0.5287,
+ "slid_loss": 0.5542,
+ "step": 4643,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0350e-04",
+ "loss": 0.4713,
+ "slid_loss": 0.5534,
+ "step": 4644,
+ "time": 14.05
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0349e-04",
+ "loss": 0.5099,
+ "slid_loss": 0.5531,
+ "step": 4645,
+ "time": 13.78
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0347e-04",
+ "loss": 0.5659,
+ "slid_loss": 0.5531,
+ "step": 4646,
+ "time": 11.35
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0346e-04",
+ "loss": 0.5787,
+ "slid_loss": 0.5539,
+ "step": 4647,
+ "time": 12.64
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": "1.0345e-04",
+ "loss": 0.418,
+ "slid_loss": 0.5532,
+ "step": 4648,
+ "time": 12.93
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0344e-04",
+ "loss": 0.4902,
+ "slid_loss": 0.5523,
+ "step": 4649,
+ "time": 13.84
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0342e-04",
+ "loss": 0.5935,
+ "slid_loss": 0.553,
+ "step": 4650,
+ "time": 11.42
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0341e-04",
+ "loss": 0.6261,
+ "slid_loss": 0.5536,
+ "step": 4651,
+ "time": 14.12
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0340e-04",
+ "loss": 0.5392,
+ "slid_loss": 0.5535,
+ "step": 4652,
+ "time": 13.34
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0339e-04",
+ "loss": 0.5094,
+ "slid_loss": 0.553,
+ "step": 4653,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0338e-04",
+ "loss": 0.5454,
+ "slid_loss": 0.5525,
+ "step": 4654,
+ "time": 13.03
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0336e-04",
+ "loss": 0.5727,
+ "slid_loss": 0.5531,
+ "step": 4655,
+ "time": 11.03
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0335e-04",
+ "loss": 0.5821,
+ "slid_loss": 0.5534,
+ "step": 4656,
+ "time": 13.36
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0334e-04",
+ "loss": 0.5104,
+ "slid_loss": 0.5533,
+ "step": 4657,
+ "time": 13.58
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": "1.0333e-04",
+ "loss": 0.6151,
+ "slid_loss": 0.5542,
+ "step": 4658,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0332e-04",
+ "loss": 0.4998,
+ "slid_loss": 0.5543,
+ "step": 4659,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0330e-04",
+ "loss": 0.5408,
+ "slid_loss": 0.5544,
+ "step": 4660,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0329e-04",
+ "loss": 0.523,
+ "slid_loss": 0.5543,
+ "step": 4661,
+ "time": 14.2
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0328e-04",
+ "loss": 0.5254,
+ "slid_loss": 0.5539,
+ "step": 4662,
+ "time": 12.9
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0327e-04",
+ "loss": 0.4647,
+ "slid_loss": 0.5531,
+ "step": 4663,
+ "time": 13.79
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0326e-04",
+ "loss": 0.5454,
+ "slid_loss": 0.5531,
+ "step": 4664,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0324e-04",
+ "loss": 0.4749,
+ "slid_loss": 0.5523,
+ "step": 4665,
+ "time": 13.58
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0323e-04",
+ "loss": 0.5637,
+ "slid_loss": 0.5521,
+ "step": 4666,
+ "time": 13.44
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0322e-04",
+ "loss": 0.5824,
+ "slid_loss": 0.5516,
+ "step": 4667,
+ "time": 12.88
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": "1.0321e-04",
+ "loss": 0.5336,
+ "slid_loss": 0.5511,
+ "step": 4668,
+ "time": 13.73
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0320e-04",
+ "loss": 0.5507,
+ "slid_loss": 0.5515,
+ "step": 4669,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0318e-04",
+ "loss": 0.5363,
+ "slid_loss": 0.5519,
+ "step": 4670,
+ "time": 13.18
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0317e-04",
+ "loss": 0.5987,
+ "slid_loss": 0.5526,
+ "step": 4671,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0316e-04",
+ "loss": 0.4834,
+ "slid_loss": 0.5521,
+ "step": 4672,
+ "time": 13.96
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0315e-04",
+ "loss": 0.4996,
+ "slid_loss": 0.5512,
+ "step": 4673,
+ "time": 12.85
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0314e-04",
+ "loss": 0.5675,
+ "slid_loss": 0.5503,
+ "step": 4674,
+ "time": 12.98
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0313e-04",
+ "loss": 0.5412,
+ "slid_loss": 0.5502,
+ "step": 4675,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0311e-04",
+ "loss": 0.4883,
+ "slid_loss": 0.5497,
+ "step": 4676,
+ "time": 12.04
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0310e-04",
+ "loss": 0.5627,
+ "slid_loss": 0.5495,
+ "step": 4677,
+ "time": 11.45
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0309e-04",
+ "loss": 0.5914,
+ "slid_loss": 0.5494,
+ "step": 4678,
+ "time": 11.44
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": "1.0308e-04",
+ "loss": 0.5366,
+ "slid_loss": 0.5491,
+ "step": 4679,
+ "time": 12.81
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0307e-04",
+ "loss": 0.5133,
+ "slid_loss": 0.5492,
+ "step": 4680,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0306e-04",
+ "loss": 0.5782,
+ "slid_loss": 0.5485,
+ "step": 4681,
+ "time": 12.89
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0304e-04",
+ "loss": 0.4773,
+ "slid_loss": 0.5478,
+ "step": 4682,
+ "time": 13.38
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0303e-04",
+ "loss": 0.4921,
+ "slid_loss": 0.548,
+ "step": 4683,
+ "time": 11.96
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0302e-04",
+ "loss": 0.5554,
+ "slid_loss": 0.5476,
+ "step": 4684,
+ "time": 12.97
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0301e-04",
+ "loss": 0.6223,
+ "slid_loss": 0.549,
+ "step": 4685,
+ "time": 13.52
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0300e-04",
+ "loss": 0.5961,
+ "slid_loss": 0.5488,
+ "step": 4686,
+ "time": 13.85
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0299e-04",
+ "loss": 0.5749,
+ "slid_loss": 0.5496,
+ "step": 4687,
+ "time": 13.62
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0298e-04",
+ "loss": 0.517,
+ "slid_loss": 0.5488,
+ "step": 4688,
+ "time": 12.07
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": "1.0296e-04",
+ "loss": 0.5318,
+ "slid_loss": 0.5488,
+ "step": 4689,
+ "time": 12.23
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0295e-04",
+ "loss": 0.576,
+ "slid_loss": 0.5493,
+ "step": 4690,
+ "time": 13.16
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0294e-04",
+ "loss": 0.5547,
+ "slid_loss": 0.5501,
+ "step": 4691,
+ "time": 12.62
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0293e-04",
+ "loss": 0.5048,
+ "slid_loss": 0.5498,
+ "step": 4692,
+ "time": 13.95
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0292e-04",
+ "loss": 0.5649,
+ "slid_loss": 0.5497,
+ "step": 4693,
+ "time": 13.2
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0291e-04",
+ "loss": 0.5191,
+ "slid_loss": 0.5493,
+ "step": 4694,
+ "time": 12.93
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0290e-04",
+ "loss": 0.5293,
+ "slid_loss": 0.5499,
+ "step": 4695,
+ "time": 12.88
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0289e-04",
+ "loss": 0.5833,
+ "slid_loss": 0.5502,
+ "step": 4696,
+ "time": 14.23
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0287e-04",
+ "loss": 0.5538,
+ "slid_loss": 0.5501,
+ "step": 4697,
+ "time": 13.23
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0286e-04",
+ "loss": 0.5292,
+ "slid_loss": 0.5503,
+ "step": 4698,
+ "time": 13.27
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0285e-04",
+ "loss": 0.5432,
+ "slid_loss": 0.5503,
+ "step": 4699,
+ "time": 13.34
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": "1.0284e-04",
+ "loss": 0.5262,
+ "slid_loss": 0.5507,
+ "step": 4700,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0283e-04",
+ "loss": 0.522,
+ "slid_loss": 0.5505,
+ "step": 4701,
+ "time": 14.95
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0282e-04",
+ "loss": 0.5277,
+ "slid_loss": 0.5509,
+ "step": 4702,
+ "time": 10.61
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0281e-04",
+ "loss": 0.4832,
+ "slid_loss": 0.5492,
+ "step": 4703,
+ "time": 13.81
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0280e-04",
+ "loss": 0.569,
+ "slid_loss": 0.5499,
+ "step": 4704,
+ "time": 13.12
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0279e-04",
+ "loss": 0.5246,
+ "slid_loss": 0.5499,
+ "step": 4705,
+ "time": 13.4
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0277e-04",
+ "loss": 0.5187,
+ "slid_loss": 0.5494,
+ "step": 4706,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0276e-04",
+ "loss": 0.483,
+ "slid_loss": 0.5476,
+ "step": 4707,
+ "time": 12.29
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0275e-04",
+ "loss": 0.5577,
+ "slid_loss": 0.547,
+ "step": 4708,
+ "time": 13.52
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0274e-04",
+ "loss": 0.5263,
+ "slid_loss": 0.5464,
+ "step": 4709,
+ "time": 12.32
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": "1.0273e-04",
+ "loss": 0.55,
+ "slid_loss": 0.546,
+ "step": 4710,
+ "time": 13.81
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0272e-04",
+ "loss": 0.571,
+ "slid_loss": 0.5462,
+ "step": 4711,
+ "time": 11.53
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0271e-04",
+ "loss": 0.5602,
+ "slid_loss": 0.5463,
+ "step": 4712,
+ "time": 12.68
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0270e-04",
+ "loss": 0.5305,
+ "slid_loss": 0.5456,
+ "step": 4713,
+ "time": 13.27
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0269e-04",
+ "loss": 0.4363,
+ "slid_loss": 0.5437,
+ "step": 4714,
+ "time": 14.28
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0268e-04",
+ "loss": 0.5301,
+ "slid_loss": 0.5427,
+ "step": 4715,
+ "time": 13.79
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0267e-04",
+ "loss": 0.6129,
+ "slid_loss": 0.5435,
+ "step": 4716,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0265e-04",
+ "loss": 0.53,
+ "slid_loss": 0.5431,
+ "step": 4717,
+ "time": 12.34
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0264e-04",
+ "loss": 0.5885,
+ "slid_loss": 0.5433,
+ "step": 4718,
+ "time": 12.38
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0263e-04",
+ "loss": 0.5413,
+ "slid_loss": 0.5429,
+ "step": 4719,
+ "time": 12.09
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": "1.0262e-04",
+ "loss": 0.596,
+ "slid_loss": 0.5435,
+ "step": 4720,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0261e-04",
+ "loss": 0.513,
+ "slid_loss": 0.543,
+ "step": 4721,
+ "time": 12.93
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0260e-04",
+ "loss": 0.5262,
+ "slid_loss": 0.5433,
+ "step": 4722,
+ "time": 13.89
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0259e-04",
+ "loss": 0.5726,
+ "slid_loss": 0.5437,
+ "step": 4723,
+ "time": 13.66
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0258e-04",
+ "loss": 0.5686,
+ "slid_loss": 0.5441,
+ "step": 4724,
+ "time": 12.17
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0257e-04",
+ "loss": 0.5895,
+ "slid_loss": 0.544,
+ "step": 4725,
+ "time": 13.04
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0256e-04",
+ "loss": 0.5341,
+ "slid_loss": 0.5444,
+ "step": 4726,
+ "time": 14.04
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0255e-04",
+ "loss": 0.5201,
+ "slid_loss": 0.544,
+ "step": 4727,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0254e-04",
+ "loss": 0.562,
+ "slid_loss": 0.544,
+ "step": 4728,
+ "time": 12.71
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0253e-04",
+ "loss": 0.573,
+ "slid_loss": 0.5441,
+ "step": 4729,
+ "time": 13.28
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0252e-04",
+ "loss": 0.4773,
+ "slid_loss": 0.5435,
+ "step": 4730,
+ "time": 11.68
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": "1.0251e-04",
+ "loss": 0.5289,
+ "slid_loss": 0.5434,
+ "step": 4731,
+ "time": 11.54
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0250e-04",
+ "loss": 0.545,
+ "slid_loss": 0.5431,
+ "step": 4732,
+ "time": 12.9
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0248e-04",
+ "loss": 0.5899,
+ "slid_loss": 0.5424,
+ "step": 4733,
+ "time": 13.25
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0247e-04",
+ "loss": 0.5469,
+ "slid_loss": 0.5429,
+ "step": 4734,
+ "time": 12.79
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0246e-04",
+ "loss": 0.5054,
+ "slid_loss": 0.5416,
+ "step": 4735,
+ "time": 13.75
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0245e-04",
+ "loss": 0.5483,
+ "slid_loss": 0.5423,
+ "step": 4736,
+ "time": 12.68
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0244e-04",
+ "loss": 0.5817,
+ "slid_loss": 0.5423,
+ "step": 4737,
+ "time": 14.05
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0243e-04",
+ "loss": 0.5196,
+ "slid_loss": 0.5414,
+ "step": 4738,
+ "time": 13.56
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0242e-04",
+ "loss": 0.5095,
+ "slid_loss": 0.5407,
+ "step": 4739,
+ "time": 12.04
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0241e-04",
+ "loss": 0.5494,
+ "slid_loss": 0.5401,
+ "step": 4740,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": "1.0240e-04",
+ "loss": 0.5926,
+ "slid_loss": 0.5404,
+ "step": 4741,
+ "time": 12.97
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0239e-04",
+ "loss": 0.5782,
+ "slid_loss": 0.541,
+ "step": 4742,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0238e-04",
+ "loss": 0.5186,
+ "slid_loss": 0.5409,
+ "step": 4743,
+ "time": 12.91
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0237e-04",
+ "loss": 0.5384,
+ "slid_loss": 0.5416,
+ "step": 4744,
+ "time": 11.89
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0236e-04",
+ "loss": 0.4831,
+ "slid_loss": 0.5413,
+ "step": 4745,
+ "time": 12.47
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0235e-04",
+ "loss": 0.5764,
+ "slid_loss": 0.5414,
+ "step": 4746,
+ "time": 12.98
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0234e-04",
+ "loss": 0.5058,
+ "slid_loss": 0.5407,
+ "step": 4747,
+ "time": 13.74
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0233e-04",
+ "loss": 0.5507,
+ "slid_loss": 0.542,
+ "step": 4748,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0232e-04",
+ "loss": 0.535,
+ "slid_loss": 0.5425,
+ "step": 4749,
+ "time": 13.18
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0231e-04",
+ "loss": 0.5451,
+ "slid_loss": 0.542,
+ "step": 4750,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0230e-04",
+ "loss": 0.6298,
+ "slid_loss": 0.542,
+ "step": 4751,
+ "time": 12.84
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": "1.0229e-04",
+ "loss": 0.5798,
+ "slid_loss": 0.5424,
+ "step": 4752,
+ "time": 12.83
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0228e-04",
+ "loss": 0.6316,
+ "slid_loss": 0.5436,
+ "step": 4753,
+ "time": 13.97
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0227e-04",
+ "loss": 0.619,
+ "slid_loss": 0.5444,
+ "step": 4754,
+ "time": 12.78
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0226e-04",
+ "loss": 0.5301,
+ "slid_loss": 0.5439,
+ "step": 4755,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0225e-04",
+ "loss": 0.5216,
+ "slid_loss": 0.5433,
+ "step": 4756,
+ "time": 13.17
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0224e-04",
+ "loss": 0.5148,
+ "slid_loss": 0.5434,
+ "step": 4757,
+ "time": 13.2
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0223e-04",
+ "loss": 0.4781,
+ "slid_loss": 0.542,
+ "step": 4758,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0222e-04",
+ "loss": 0.447,
+ "slid_loss": 0.5415,
+ "step": 4759,
+ "time": 13.24
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0221e-04",
+ "loss": 0.4941,
+ "slid_loss": 0.541,
+ "step": 4760,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0220e-04",
+ "loss": 0.5129,
+ "slid_loss": 0.5409,
+ "step": 4761,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": "1.0219e-04",
+ "loss": 0.5654,
+ "slid_loss": 0.5413,
+ "step": 4762,
+ "time": 11.38
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0218e-04",
+ "loss": 0.524,
+ "slid_loss": 0.5419,
+ "step": 4763,
+ "time": 13.47
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0217e-04",
+ "loss": 0.508,
+ "slid_loss": 0.5415,
+ "step": 4764,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0216e-04",
+ "loss": 0.5876,
+ "slid_loss": 0.5427,
+ "step": 4765,
+ "time": 11.86
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0215e-04",
+ "loss": 0.5516,
+ "slid_loss": 0.5425,
+ "step": 4766,
+ "time": 13.41
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0214e-04",
+ "loss": 0.5895,
+ "slid_loss": 0.5426,
+ "step": 4767,
+ "time": 11.75
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0213e-04",
+ "loss": 0.6036,
+ "slid_loss": 0.5433,
+ "step": 4768,
+ "time": 12.22
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0212e-04",
+ "loss": 0.4842,
+ "slid_loss": 0.5427,
+ "step": 4769,
+ "time": 14.2
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0211e-04",
+ "loss": 0.5387,
+ "slid_loss": 0.5427,
+ "step": 4770,
+ "time": 12.76
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0210e-04",
+ "loss": 0.5536,
+ "slid_loss": 0.5422,
+ "step": 4771,
+ "time": 11.43
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": "1.0209e-04",
+ "loss": 0.5464,
+ "slid_loss": 0.5429,
+ "step": 4772,
+ "time": 13.4
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0208e-04",
+ "loss": 0.4983,
+ "slid_loss": 0.5428,
+ "step": 4773,
+ "time": 12.98
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0207e-04",
+ "loss": 0.5401,
+ "slid_loss": 0.5426,
+ "step": 4774,
+ "time": 13.93
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0207e-04",
+ "loss": 0.5177,
+ "slid_loss": 0.5423,
+ "step": 4775,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0206e-04",
+ "loss": 0.5534,
+ "slid_loss": 0.543,
+ "step": 4776,
+ "time": 13.91
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0205e-04",
+ "loss": 0.4727,
+ "slid_loss": 0.5421,
+ "step": 4777,
+ "time": 12.35
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0204e-04",
+ "loss": 0.5425,
+ "slid_loss": 0.5416,
+ "step": 4778,
+ "time": 12.97
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0203e-04",
+ "loss": 0.5913,
+ "slid_loss": 0.5421,
+ "step": 4779,
+ "time": 14.16
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0202e-04",
+ "loss": 0.4712,
+ "slid_loss": 0.5417,
+ "step": 4780,
+ "time": 12.23
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0201e-04",
+ "loss": 0.5199,
+ "slid_loss": 0.5411,
+ "step": 4781,
+ "time": 14.38
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0200e-04",
+ "loss": 0.5233,
+ "slid_loss": 0.5416,
+ "step": 4782,
+ "time": 13.77
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": "1.0199e-04",
+ "loss": 0.5421,
+ "slid_loss": 0.5421,
+ "step": 4783,
+ "time": 13.19
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0198e-04",
+ "loss": 0.5507,
+ "slid_loss": 0.5421,
+ "step": 4784,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0197e-04",
+ "loss": 0.5617,
+ "slid_loss": 0.5414,
+ "step": 4785,
+ "time": 14.0
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0196e-04",
+ "loss": 0.4796,
+ "slid_loss": 0.5403,
+ "step": 4786,
+ "time": 13.62
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0195e-04",
+ "loss": 0.4877,
+ "slid_loss": 0.5394,
+ "step": 4787,
+ "time": 13.93
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0194e-04",
+ "loss": 0.5647,
+ "slid_loss": 0.5399,
+ "step": 4788,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0193e-04",
+ "loss": 0.4568,
+ "slid_loss": 0.5391,
+ "step": 4789,
+ "time": 13.87
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0192e-04",
+ "loss": 0.4782,
+ "slid_loss": 0.5382,
+ "step": 4790,
+ "time": 11.93
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0192e-04",
+ "loss": 0.5409,
+ "slid_loss": 0.538,
+ "step": 4791,
+ "time": 12.62
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0191e-04",
+ "loss": 0.5955,
+ "slid_loss": 0.5389,
+ "step": 4792,
+ "time": 11.76
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": "1.0190e-04",
+ "loss": 0.5565,
+ "slid_loss": 0.5388,
+ "step": 4793,
+ "time": 13.1
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0189e-04",
+ "loss": 0.4946,
+ "slid_loss": 0.5386,
+ "step": 4794,
+ "time": 13.62
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0188e-04",
+ "loss": 0.5959,
+ "slid_loss": 0.5393,
+ "step": 4795,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0187e-04",
+ "loss": 0.4435,
+ "slid_loss": 0.5379,
+ "step": 4796,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0186e-04",
+ "loss": 0.5816,
+ "slid_loss": 0.5381,
+ "step": 4797,
+ "time": 11.35
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0185e-04",
+ "loss": 0.5437,
+ "slid_loss": 0.5383,
+ "step": 4798,
+ "time": 13.96
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0184e-04",
+ "loss": 0.5581,
+ "slid_loss": 0.5384,
+ "step": 4799,
+ "time": 12.81
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0183e-04",
+ "loss": 0.564,
+ "slid_loss": 0.5388,
+ "step": 4800,
+ "time": 12.32
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0182e-04",
+ "loss": 0.5373,
+ "slid_loss": 0.539,
+ "step": 4801,
+ "time": 13.16
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0182e-04",
+ "loss": 0.5305,
+ "slid_loss": 0.539,
+ "step": 4802,
+ "time": 13.7
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0181e-04",
+ "loss": 0.4863,
+ "slid_loss": 0.539,
+ "step": 4803,
+ "time": 13.77
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": "1.0180e-04",
+ "loss": 0.5463,
+ "slid_loss": 0.5388,
+ "step": 4804,
+ "time": 11.69
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0179e-04",
+ "loss": 0.4792,
+ "slid_loss": 0.5383,
+ "step": 4805,
+ "time": 14.12
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0178e-04",
+ "loss": 0.4802,
+ "slid_loss": 0.538,
+ "step": 4806,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0177e-04",
+ "loss": 0.5645,
+ "slid_loss": 0.5388,
+ "step": 4807,
+ "time": 13.32
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0176e-04",
+ "loss": 0.5218,
+ "slid_loss": 0.5384,
+ "step": 4808,
+ "time": 13.73
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0175e-04",
+ "loss": 0.5438,
+ "slid_loss": 0.5386,
+ "step": 4809,
+ "time": 11.07
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0174e-04",
+ "loss": 0.4951,
+ "slid_loss": 0.538,
+ "step": 4810,
+ "time": 13.02
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0174e-04",
+ "loss": 0.4749,
+ "slid_loss": 0.5371,
+ "step": 4811,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0173e-04",
+ "loss": 0.4724,
+ "slid_loss": 0.5362,
+ "step": 4812,
+ "time": 12.27
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0172e-04",
+ "loss": 0.535,
+ "slid_loss": 0.5362,
+ "step": 4813,
+ "time": 11.1
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": "1.0171e-04",
+ "loss": 0.563,
+ "slid_loss": 0.5375,
+ "step": 4814,
+ "time": 12.89
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0170e-04",
+ "loss": 0.5637,
+ "slid_loss": 0.5379,
+ "step": 4815,
+ "time": 13.18
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0169e-04",
+ "loss": 0.5747,
+ "slid_loss": 0.5375,
+ "step": 4816,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0168e-04",
+ "loss": 0.472,
+ "slid_loss": 0.5369,
+ "step": 4817,
+ "time": 13.28
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0167e-04",
+ "loss": 0.6013,
+ "slid_loss": 0.537,
+ "step": 4818,
+ "time": 13.83
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0167e-04",
+ "loss": 0.5446,
+ "slid_loss": 0.5371,
+ "step": 4819,
+ "time": 12.02
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0166e-04",
+ "loss": 0.5393,
+ "slid_loss": 0.5365,
+ "step": 4820,
+ "time": 13.69
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0165e-04",
+ "loss": 0.638,
+ "slid_loss": 0.5377,
+ "step": 4821,
+ "time": 13.4
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0164e-04",
+ "loss": 0.5387,
+ "slid_loss": 0.5379,
+ "step": 4822,
+ "time": 13.58
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0163e-04",
+ "loss": 0.5703,
+ "slid_loss": 0.5378,
+ "step": 4823,
+ "time": 12.88
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0162e-04",
+ "loss": 0.5315,
+ "slid_loss": 0.5375,
+ "step": 4824,
+ "time": 12.82
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": "1.0162e-04",
+ "loss": 0.6047,
+ "slid_loss": 0.5376,
+ "step": 4825,
+ "time": 11.26
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0161e-04",
+ "loss": 0.5411,
+ "slid_loss": 0.5377,
+ "step": 4826,
+ "time": 12.11
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0160e-04",
+ "loss": 0.5387,
+ "slid_loss": 0.5379,
+ "step": 4827,
+ "time": 13.34
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0159e-04",
+ "loss": 0.4582,
+ "slid_loss": 0.5368,
+ "step": 4828,
+ "time": 12.89
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0158e-04",
+ "loss": 0.4897,
+ "slid_loss": 0.536,
+ "step": 4829,
+ "time": 13.16
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0157e-04",
+ "loss": 0.5776,
+ "slid_loss": 0.537,
+ "step": 4830,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0156e-04",
+ "loss": 0.5501,
+ "slid_loss": 0.5372,
+ "step": 4831,
+ "time": 13.51
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0156e-04",
+ "loss": 0.6115,
+ "slid_loss": 0.5379,
+ "step": 4832,
+ "time": 11.29
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0155e-04",
+ "loss": 0.5389,
+ "slid_loss": 0.5374,
+ "step": 4833,
+ "time": 11.36
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0154e-04",
+ "loss": 0.5609,
+ "slid_loss": 0.5375,
+ "step": 4834,
+ "time": 12.92
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": "1.0153e-04",
+ "loss": 0.5405,
+ "slid_loss": 0.5379,
+ "step": 4835,
+ "time": 12.81
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0152e-04",
+ "loss": 0.5282,
+ "slid_loss": 0.5377,
+ "step": 4836,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0152e-04",
+ "loss": 0.4853,
+ "slid_loss": 0.5367,
+ "step": 4837,
+ "time": 12.4
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0151e-04",
+ "loss": 0.4882,
+ "slid_loss": 0.5364,
+ "step": 4838,
+ "time": 11.86
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0150e-04",
+ "loss": 0.5861,
+ "slid_loss": 0.5371,
+ "step": 4839,
+ "time": 12.96
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0149e-04",
+ "loss": 0.5277,
+ "slid_loss": 0.5369,
+ "step": 4840,
+ "time": 12.24
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0148e-04",
+ "loss": 0.5788,
+ "slid_loss": 0.5368,
+ "step": 4841,
+ "time": 13.37
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0147e-04",
+ "loss": 0.5523,
+ "slid_loss": 0.5365,
+ "step": 4842,
+ "time": 12.74
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0147e-04",
+ "loss": 0.4296,
+ "slid_loss": 0.5356,
+ "step": 4843,
+ "time": 14.0
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0146e-04",
+ "loss": 0.5319,
+ "slid_loss": 0.5356,
+ "step": 4844,
+ "time": 14.01
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": "1.0145e-04",
+ "loss": 0.5942,
+ "slid_loss": 0.5367,
+ "step": 4845,
+ "time": 14.16
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0144e-04",
+ "loss": 0.5672,
+ "slid_loss": 0.5366,
+ "step": 4846,
+ "time": 11.48
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0143e-04",
+ "loss": 0.5001,
+ "slid_loss": 0.5365,
+ "step": 4847,
+ "time": 13.91
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0143e-04",
+ "loss": 0.4096,
+ "slid_loss": 0.5351,
+ "step": 4848,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0142e-04",
+ "loss": 0.5526,
+ "slid_loss": 0.5353,
+ "step": 4849,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0141e-04",
+ "loss": 0.4809,
+ "slid_loss": 0.5347,
+ "step": 4850,
+ "time": 13.11
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0140e-04",
+ "loss": 0.5528,
+ "slid_loss": 0.5339,
+ "step": 4851,
+ "time": 13.84
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0139e-04",
+ "loss": 0.515,
+ "slid_loss": 0.5332,
+ "step": 4852,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0139e-04",
+ "loss": 0.5603,
+ "slid_loss": 0.5325,
+ "step": 4853,
+ "time": 12.76
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0138e-04",
+ "loss": 0.5929,
+ "slid_loss": 0.5323,
+ "step": 4854,
+ "time": 13.49
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0137e-04",
+ "loss": 0.5485,
+ "slid_loss": 0.5325,
+ "step": 4855,
+ "time": 13.5
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": "1.0136e-04",
+ "loss": 0.4972,
+ "slid_loss": 0.5322,
+ "step": 4856,
+ "time": 12.22
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0136e-04",
+ "loss": 0.4847,
+ "slid_loss": 0.5319,
+ "step": 4857,
+ "time": 13.96
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0135e-04",
+ "loss": 0.514,
+ "slid_loss": 0.5323,
+ "step": 4858,
+ "time": 12.94
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0134e-04",
+ "loss": 0.571,
+ "slid_loss": 0.5335,
+ "step": 4859,
+ "time": 11.86
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0133e-04",
+ "loss": 0.4894,
+ "slid_loss": 0.5335,
+ "step": 4860,
+ "time": 12.24
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0132e-04",
+ "loss": 0.4898,
+ "slid_loss": 0.5332,
+ "step": 4861,
+ "time": 13.27
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0132e-04",
+ "loss": 0.5819,
+ "slid_loss": 0.5334,
+ "step": 4862,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0131e-04",
+ "loss": 0.5666,
+ "slid_loss": 0.5338,
+ "step": 4863,
+ "time": 13.4
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0130e-04",
+ "loss": 0.4971,
+ "slid_loss": 0.5337,
+ "step": 4864,
+ "time": 13.56
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0129e-04",
+ "loss": 0.5411,
+ "slid_loss": 0.5333,
+ "step": 4865,
+ "time": 12.97
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": "1.0129e-04",
+ "loss": 0.5699,
+ "slid_loss": 0.5334,
+ "step": 4866,
+ "time": 12.42
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0128e-04",
+ "loss": 0.5822,
+ "slid_loss": 0.5334,
+ "step": 4867,
+ "time": 13.66
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0127e-04",
+ "loss": 0.5203,
+ "slid_loss": 0.5325,
+ "step": 4868,
+ "time": 11.11
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0126e-04",
+ "loss": 0.5954,
+ "slid_loss": 0.5336,
+ "step": 4869,
+ "time": 12.32
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0126e-04",
+ "loss": 0.5122,
+ "slid_loss": 0.5334,
+ "step": 4870,
+ "time": 14.23
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0125e-04",
+ "loss": 0.5229,
+ "slid_loss": 0.5331,
+ "step": 4871,
+ "time": 13.52
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0124e-04",
+ "loss": 0.4339,
+ "slid_loss": 0.5319,
+ "step": 4872,
+ "time": 12.25
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0123e-04",
+ "loss": 0.5581,
+ "slid_loss": 0.5325,
+ "step": 4873,
+ "time": 13.67
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0123e-04",
+ "loss": 0.5138,
+ "slid_loss": 0.5323,
+ "step": 4874,
+ "time": 11.73
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0122e-04",
+ "loss": 0.5314,
+ "slid_loss": 0.5324,
+ "step": 4875,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0121e-04",
+ "loss": 0.5048,
+ "slid_loss": 0.5319,
+ "step": 4876,
+ "time": 13.28
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": "1.0121e-04",
+ "loss": 0.5344,
+ "slid_loss": 0.5325,
+ "step": 4877,
+ "time": 13.32
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0120e-04",
+ "loss": 0.5232,
+ "slid_loss": 0.5324,
+ "step": 4878,
+ "time": 12.56
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0119e-04",
+ "loss": 0.5407,
+ "slid_loss": 0.5318,
+ "step": 4879,
+ "time": 11.73
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0118e-04",
+ "loss": 0.5886,
+ "slid_loss": 0.533,
+ "step": 4880,
+ "time": 13.38
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0118e-04",
+ "loss": 0.5512,
+ "slid_loss": 0.5333,
+ "step": 4881,
+ "time": 13.77
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0117e-04",
+ "loss": 0.5039,
+ "slid_loss": 0.5331,
+ "step": 4882,
+ "time": 12.24
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0116e-04",
+ "loss": 0.512,
+ "slid_loss": 0.5328,
+ "step": 4883,
+ "time": 13.28
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0115e-04",
+ "loss": 0.4934,
+ "slid_loss": 0.5323,
+ "step": 4884,
+ "time": 13.69
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0115e-04",
+ "loss": 0.6205,
+ "slid_loss": 0.5329,
+ "step": 4885,
+ "time": 12.69
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0114e-04",
+ "loss": 0.4623,
+ "slid_loss": 0.5327,
+ "step": 4886,
+ "time": 11.34
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": "1.0113e-04",
+ "loss": 0.55,
+ "slid_loss": 0.5333,
+ "step": 4887,
+ "time": 14.09
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0113e-04",
+ "loss": 0.5231,
+ "slid_loss": 0.5329,
+ "step": 4888,
+ "time": 13.74
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0112e-04",
+ "loss": 0.5807,
+ "slid_loss": 0.5341,
+ "step": 4889,
+ "time": 12.74
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0111e-04",
+ "loss": 0.5082,
+ "slid_loss": 0.5344,
+ "step": 4890,
+ "time": 12.84
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0110e-04",
+ "loss": 0.5107,
+ "slid_loss": 0.5341,
+ "step": 4891,
+ "time": 11.39
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0110e-04",
+ "loss": 0.5093,
+ "slid_loss": 0.5333,
+ "step": 4892,
+ "time": 13.93
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0109e-04",
+ "loss": 0.5792,
+ "slid_loss": 0.5335,
+ "step": 4893,
+ "time": 12.84
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0108e-04",
+ "loss": 0.5227,
+ "slid_loss": 0.5338,
+ "step": 4894,
+ "time": 13.51
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0108e-04",
+ "loss": 0.5081,
+ "slid_loss": 0.5329,
+ "step": 4895,
+ "time": 13.11
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0107e-04",
+ "loss": 0.5707,
+ "slid_loss": 0.5342,
+ "step": 4896,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": "1.0106e-04",
+ "loss": 0.5116,
+ "slid_loss": 0.5335,
+ "step": 4897,
+ "time": 12.84
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0106e-04",
+ "loss": 0.5792,
+ "slid_loss": 0.5338,
+ "step": 4898,
+ "time": 13.76
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0105e-04",
+ "loss": 0.5284,
+ "slid_loss": 0.5335,
+ "step": 4899,
+ "time": 11.69
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0104e-04",
+ "loss": 0.4811,
+ "slid_loss": 0.5327,
+ "step": 4900,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0104e-04",
+ "loss": 0.5504,
+ "slid_loss": 0.5328,
+ "step": 4901,
+ "time": 13.78
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0103e-04",
+ "loss": 0.5991,
+ "slid_loss": 0.5335,
+ "step": 4902,
+ "time": 13.41
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0102e-04",
+ "loss": 0.5009,
+ "slid_loss": 0.5337,
+ "step": 4903,
+ "time": 12.93
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0102e-04",
+ "loss": 0.5484,
+ "slid_loss": 0.5337,
+ "step": 4904,
+ "time": 13.36
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0101e-04",
+ "loss": 0.5095,
+ "slid_loss": 0.534,
+ "step": 4905,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0100e-04",
+ "loss": 0.4432,
+ "slid_loss": 0.5336,
+ "step": 4906,
+ "time": 13.21
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0100e-04",
+ "loss": 0.549,
+ "slid_loss": 0.5335,
+ "step": 4907,
+ "time": 13.43
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": "1.0099e-04",
+ "loss": 0.5684,
+ "slid_loss": 0.5339,
+ "step": 4908,
+ "time": 13.35
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0098e-04",
+ "loss": 0.5753,
+ "slid_loss": 0.5342,
+ "step": 4909,
+ "time": 13.7
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0098e-04",
+ "loss": 0.4767,
+ "slid_loss": 0.534,
+ "step": 4910,
+ "time": 14.32
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0097e-04",
+ "loss": 0.5007,
+ "slid_loss": 0.5343,
+ "step": 4911,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0096e-04",
+ "loss": 0.5482,
+ "slid_loss": 0.5351,
+ "step": 4912,
+ "time": 12.89
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0096e-04",
+ "loss": 0.5575,
+ "slid_loss": 0.5353,
+ "step": 4913,
+ "time": 13.76
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0095e-04",
+ "loss": 0.5232,
+ "slid_loss": 0.5349,
+ "step": 4914,
+ "time": 13.28
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0094e-04",
+ "loss": 0.4772,
+ "slid_loss": 0.534,
+ "step": 4915,
+ "time": 13.11
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0094e-04",
+ "loss": 0.537,
+ "slid_loss": 0.5336,
+ "step": 4916,
+ "time": 13.13
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0093e-04",
+ "loss": 0.4641,
+ "slid_loss": 0.5336,
+ "step": 4917,
+ "time": 13.95
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": "1.0092e-04",
+ "loss": 0.5912,
+ "slid_loss": 0.5335,
+ "step": 4918,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0092e-04",
+ "loss": 0.5061,
+ "slid_loss": 0.5331,
+ "step": 4919,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0091e-04",
+ "loss": 0.6186,
+ "slid_loss": 0.5339,
+ "step": 4920,
+ "time": 13.18
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0090e-04",
+ "loss": 0.4853,
+ "slid_loss": 0.5323,
+ "step": 4921,
+ "time": 13.2
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0090e-04",
+ "loss": 0.5163,
+ "slid_loss": 0.5321,
+ "step": 4922,
+ "time": 13.03
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0089e-04",
+ "loss": 0.5821,
+ "slid_loss": 0.5322,
+ "step": 4923,
+ "time": 13.26
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0089e-04",
+ "loss": 0.5226,
+ "slid_loss": 0.5322,
+ "step": 4924,
+ "time": 11.58
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0088e-04",
+ "loss": 0.5353,
+ "slid_loss": 0.5315,
+ "step": 4925,
+ "time": 13.31
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0087e-04",
+ "loss": 0.5967,
+ "slid_loss": 0.532,
+ "step": 4926,
+ "time": 12.73
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0087e-04",
+ "loss": 0.5595,
+ "slid_loss": 0.5322,
+ "step": 4927,
+ "time": 12.78
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0086e-04",
+ "loss": 0.4512,
+ "slid_loss": 0.5322,
+ "step": 4928,
+ "time": 11.97
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": "1.0085e-04",
+ "loss": 0.5241,
+ "slid_loss": 0.5325,
+ "step": 4929,
+ "time": 13.11
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0085e-04",
+ "loss": 0.4569,
+ "slid_loss": 0.5313,
+ "step": 4930,
+ "time": 12.3
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0084e-04",
+ "loss": 0.5759,
+ "slid_loss": 0.5316,
+ "step": 4931,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0084e-04",
+ "loss": 0.5114,
+ "slid_loss": 0.5305,
+ "step": 4932,
+ "time": 12.68
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0083e-04",
+ "loss": 0.4793,
+ "slid_loss": 0.53,
+ "step": 4933,
+ "time": 13.24
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0082e-04",
+ "loss": 0.5702,
+ "slid_loss": 0.53,
+ "step": 4934,
+ "time": 13.43
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0082e-04",
+ "loss": 0.4865,
+ "slid_loss": 0.5295,
+ "step": 4935,
+ "time": 13.84
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0081e-04",
+ "loss": 0.5044,
+ "slid_loss": 0.5293,
+ "step": 4936,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0081e-04",
+ "loss": 0.594,
+ "slid_loss": 0.5304,
+ "step": 4937,
+ "time": 11.58
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0080e-04",
+ "loss": 0.5227,
+ "slid_loss": 0.5307,
+ "step": 4938,
+ "time": 14.01
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": "1.0079e-04",
+ "loss": 0.602,
+ "slid_loss": 0.5309,
+ "step": 4939,
+ "time": 12.47
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0079e-04",
+ "loss": 0.502,
+ "slid_loss": 0.5306,
+ "step": 4940,
+ "time": 12.3
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0078e-04",
+ "loss": 0.4923,
+ "slid_loss": 0.5297,
+ "step": 4941,
+ "time": 11.43
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0078e-04",
+ "loss": 0.4616,
+ "slid_loss": 0.5288,
+ "step": 4942,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0077e-04",
+ "loss": 0.5903,
+ "slid_loss": 0.5304,
+ "step": 4943,
+ "time": 14.1
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0076e-04",
+ "loss": 0.4661,
+ "slid_loss": 0.5298,
+ "step": 4944,
+ "time": 12.82
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0076e-04",
+ "loss": 0.53,
+ "slid_loss": 0.5291,
+ "step": 4945,
+ "time": 14.18
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0075e-04",
+ "loss": 0.54,
+ "slid_loss": 0.5289,
+ "step": 4946,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0075e-04",
+ "loss": 0.5015,
+ "slid_loss": 0.5289,
+ "step": 4947,
+ "time": 12.28
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0074e-04",
+ "loss": 0.4486,
+ "slid_loss": 0.5293,
+ "step": 4948,
+ "time": 13.25
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": "1.0074e-04",
+ "loss": 0.5545,
+ "slid_loss": 0.5293,
+ "step": 4949,
+ "time": 12.17
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0073e-04",
+ "loss": 0.5359,
+ "slid_loss": 0.5298,
+ "step": 4950,
+ "time": 10.97
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0072e-04",
+ "loss": 0.5002,
+ "slid_loss": 0.5293,
+ "step": 4951,
+ "time": 11.92
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0072e-04",
+ "loss": 0.5385,
+ "slid_loss": 0.5295,
+ "step": 4952,
+ "time": 13.02
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0071e-04",
+ "loss": 0.5919,
+ "slid_loss": 0.5299,
+ "step": 4953,
+ "time": 13.86
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0071e-04",
+ "loss": 0.5022,
+ "slid_loss": 0.529,
+ "step": 4954,
+ "time": 11.37
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0070e-04",
+ "loss": 0.5545,
+ "slid_loss": 0.529,
+ "step": 4955,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0070e-04",
+ "loss": 0.4945,
+ "slid_loss": 0.529,
+ "step": 4956,
+ "time": 13.47
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0069e-04",
+ "loss": 0.5483,
+ "slid_loss": 0.5296,
+ "step": 4957,
+ "time": 14.25
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0068e-04",
+ "loss": 0.5457,
+ "slid_loss": 0.5299,
+ "step": 4958,
+ "time": 13.12
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0068e-04",
+ "loss": 0.521,
+ "slid_loss": 0.5294,
+ "step": 4959,
+ "time": 11.75
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": "1.0067e-04",
+ "loss": 0.4782,
+ "slid_loss": 0.5293,
+ "step": 4960,
+ "time": 11.94
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0067e-04",
+ "loss": 0.5723,
+ "slid_loss": 0.5302,
+ "step": 4961,
+ "time": 13.05
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0066e-04",
+ "loss": 0.4774,
+ "slid_loss": 0.5291,
+ "step": 4962,
+ "time": 14.26
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0066e-04",
+ "loss": 0.4538,
+ "slid_loss": 0.528,
+ "step": 4963,
+ "time": 12.5
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0065e-04",
+ "loss": 0.5827,
+ "slid_loss": 0.5288,
+ "step": 4964,
+ "time": 14.75
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0065e-04",
+ "loss": 0.548,
+ "slid_loss": 0.5289,
+ "step": 4965,
+ "time": 13.23
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0064e-04",
+ "loss": 0.5588,
+ "slid_loss": 0.5288,
+ "step": 4966,
+ "time": 11.47
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0064e-04",
+ "loss": 0.4564,
+ "slid_loss": 0.5275,
+ "step": 4967,
+ "time": 12.19
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0063e-04",
+ "loss": 0.4801,
+ "slid_loss": 0.5271,
+ "step": 4968,
+ "time": 12.62
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0063e-04",
+ "loss": 0.5736,
+ "slid_loss": 0.5269,
+ "step": 4969,
+ "time": 12.25
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": "1.0062e-04",
+ "loss": 0.5115,
+ "slid_loss": 0.5269,
+ "step": 4970,
+ "time": 10.91
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0061e-04",
+ "loss": 0.4462,
+ "slid_loss": 0.5261,
+ "step": 4971,
+ "time": 12.88
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0061e-04",
+ "loss": 0.5051,
+ "slid_loss": 0.5269,
+ "step": 4972,
+ "time": 11.2
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0060e-04",
+ "loss": 0.506,
+ "slid_loss": 0.5263,
+ "step": 4973,
+ "time": 13.81
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0060e-04",
+ "loss": 0.5347,
+ "slid_loss": 0.5265,
+ "step": 4974,
+ "time": 12.94
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0059e-04",
+ "loss": 0.5731,
+ "slid_loss": 0.527,
+ "step": 4975,
+ "time": 13.23
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0059e-04",
+ "loss": 0.5489,
+ "slid_loss": 0.5274,
+ "step": 4976,
+ "time": 13.95
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0058e-04",
+ "loss": 0.5049,
+ "slid_loss": 0.5271,
+ "step": 4977,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0058e-04",
+ "loss": 0.4651,
+ "slid_loss": 0.5265,
+ "step": 4978,
+ "time": 12.82
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0057e-04",
+ "loss": 0.4791,
+ "slid_loss": 0.5259,
+ "step": 4979,
+ "time": 13.71
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0057e-04",
+ "loss": 0.5595,
+ "slid_loss": 0.5256,
+ "step": 4980,
+ "time": 11.62
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": "1.0056e-04",
+ "loss": 0.4834,
+ "slid_loss": 0.5249,
+ "step": 4981,
+ "time": 13.16
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0056e-04",
+ "loss": 0.5057,
+ "slid_loss": 0.525,
+ "step": 4982,
+ "time": 14.6
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0055e-04",
+ "loss": 0.508,
+ "slid_loss": 0.5249,
+ "step": 4983,
+ "time": 11.67
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0055e-04",
+ "loss": 0.5353,
+ "slid_loss": 0.5253,
+ "step": 4984,
+ "time": 13.41
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0054e-04",
+ "loss": 0.5552,
+ "slid_loss": 0.5247,
+ "step": 4985,
+ "time": 11.68
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0054e-04",
+ "loss": 0.5229,
+ "slid_loss": 0.5253,
+ "step": 4986,
+ "time": 13.08
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0053e-04",
+ "loss": 0.442,
+ "slid_loss": 0.5242,
+ "step": 4987,
+ "time": 12.49
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0053e-04",
+ "loss": 0.4588,
+ "slid_loss": 0.5236,
+ "step": 4988,
+ "time": 12.28
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0052e-04",
+ "loss": 0.5047,
+ "slid_loss": 0.5228,
+ "step": 4989,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0052e-04",
+ "loss": 0.42,
+ "slid_loss": 0.5219,
+ "step": 4990,
+ "time": 11.25
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": "1.0051e-04",
+ "loss": 0.4105,
+ "slid_loss": 0.5209,
+ "step": 4991,
+ "time": 14.5
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0051e-04",
+ "loss": 0.5074,
+ "slid_loss": 0.5209,
+ "step": 4992,
+ "time": 11.67
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0050e-04",
+ "loss": 0.5861,
+ "slid_loss": 0.521,
+ "step": 4993,
+ "time": 13.23
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0050e-04",
+ "loss": 0.5762,
+ "slid_loss": 0.5215,
+ "step": 4994,
+ "time": 14.38
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0050e-04",
+ "loss": 0.492,
+ "slid_loss": 0.5214,
+ "step": 4995,
+ "time": 13.34
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0049e-04",
+ "loss": 0.5323,
+ "slid_loss": 0.521,
+ "step": 4996,
+ "time": 13.97
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0049e-04",
+ "loss": 0.4007,
+ "slid_loss": 0.5199,
+ "step": 4997,
+ "time": 13.97
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0048e-04",
+ "loss": 0.4999,
+ "slid_loss": 0.5191,
+ "step": 4998,
+ "time": 13.34
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0048e-04",
+ "loss": 0.5152,
+ "slid_loss": 0.5189,
+ "step": 4999,
+ "time": 13.3
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0047e-04",
+ "loss": 0.4752,
+ "slid_loss": 0.5189,
+ "step": 5000,
+ "time": 13.97
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0047e-04",
+ "loss": 0.52,
+ "slid_loss": 0.5186,
+ "step": 5001,
+ "time": 12.14
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": "1.0046e-04",
+ "loss": 0.4685,
+ "slid_loss": 0.5173,
+ "step": 5002,
+ "time": 13.36
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0046e-04",
+ "loss": 0.4753,
+ "slid_loss": 0.517,
+ "step": 5003,
+ "time": 13.79
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0045e-04",
+ "loss": 0.5331,
+ "slid_loss": 0.5169,
+ "step": 5004,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0045e-04",
+ "loss": 0.538,
+ "slid_loss": 0.5171,
+ "step": 5005,
+ "time": 13.68
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0044e-04",
+ "loss": 0.4631,
+ "slid_loss": 0.5173,
+ "step": 5006,
+ "time": 11.33
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0044e-04",
+ "loss": 0.4972,
+ "slid_loss": 0.5168,
+ "step": 5007,
+ "time": 14.04
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0044e-04",
+ "loss": 0.5314,
+ "slid_loss": 0.5165,
+ "step": 5008,
+ "time": 13.69
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0043e-04",
+ "loss": 0.5263,
+ "slid_loss": 0.516,
+ "step": 5009,
+ "time": 13.11
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0043e-04",
+ "loss": 0.5838,
+ "slid_loss": 0.517,
+ "step": 5010,
+ "time": 12.96
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0042e-04",
+ "loss": 0.4748,
+ "slid_loss": 0.5168,
+ "step": 5011,
+ "time": 14.06
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": "1.0042e-04",
+ "loss": 0.6314,
+ "slid_loss": 0.5176,
+ "step": 5012,
+ "time": 14.53
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0041e-04",
+ "loss": 0.4864,
+ "slid_loss": 0.5169,
+ "step": 5013,
+ "time": 12.7
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0041e-04",
+ "loss": 0.5547,
+ "slid_loss": 0.5172,
+ "step": 5014,
+ "time": 12.79
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0041e-04",
+ "loss": 0.5734,
+ "slid_loss": 0.5182,
+ "step": 5015,
+ "time": 11.16
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0040e-04",
+ "loss": 0.459,
+ "slid_loss": 0.5174,
+ "step": 5016,
+ "time": 12.83
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0040e-04",
+ "loss": 0.5851,
+ "slid_loss": 0.5186,
+ "step": 5017,
+ "time": 13.18
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0039e-04",
+ "loss": 0.5792,
+ "slid_loss": 0.5185,
+ "step": 5018,
+ "time": 13.14
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0039e-04",
+ "loss": 0.5419,
+ "slid_loss": 0.5188,
+ "step": 5019,
+ "time": 12.4
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0038e-04",
+ "loss": 0.4976,
+ "slid_loss": 0.5176,
+ "step": 5020,
+ "time": 11.58
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0038e-04",
+ "loss": 0.4912,
+ "slid_loss": 0.5177,
+ "step": 5021,
+ "time": 12.3
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": "1.0038e-04",
+ "loss": 0.4912,
+ "slid_loss": 0.5174,
+ "step": 5022,
+ "time": 14.24
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0037e-04",
+ "loss": 0.4749,
+ "slid_loss": 0.5164,
+ "step": 5023,
+ "time": 13.93
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0037e-04",
+ "loss": 0.5367,
+ "slid_loss": 0.5165,
+ "step": 5024,
+ "time": 13.37
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0036e-04",
+ "loss": 0.4902,
+ "slid_loss": 0.5161,
+ "step": 5025,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0036e-04",
+ "loss": 0.5039,
+ "slid_loss": 0.5151,
+ "step": 5026,
+ "time": 11.89
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0036e-04",
+ "loss": 0.5113,
+ "slid_loss": 0.5147,
+ "step": 5027,
+ "time": 10.8
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0035e-04",
+ "loss": 0.4677,
+ "slid_loss": 0.5148,
+ "step": 5028,
+ "time": 13.5
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0035e-04",
+ "loss": 0.5348,
+ "slid_loss": 0.5149,
+ "step": 5029,
+ "time": 12.88
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0034e-04",
+ "loss": 0.4847,
+ "slid_loss": 0.5152,
+ "step": 5030,
+ "time": 11.0
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0034e-04",
+ "loss": 0.4739,
+ "slid_loss": 0.5142,
+ "step": 5031,
+ "time": 12.03
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0034e-04",
+ "loss": 0.5386,
+ "slid_loss": 0.5145,
+ "step": 5032,
+ "time": 13.71
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": "1.0033e-04",
+ "loss": 0.5103,
+ "slid_loss": 0.5148,
+ "step": 5033,
+ "time": 13.73
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0033e-04",
+ "loss": 0.5369,
+ "slid_loss": 0.5144,
+ "step": 5034,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0032e-04",
+ "loss": 0.482,
+ "slid_loss": 0.5144,
+ "step": 5035,
+ "time": 13.77
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0032e-04",
+ "loss": 0.5149,
+ "slid_loss": 0.5145,
+ "step": 5036,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0032e-04",
+ "loss": 0.4198,
+ "slid_loss": 0.5127,
+ "step": 5037,
+ "time": 13.95
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0031e-04",
+ "loss": 0.4854,
+ "slid_loss": 0.5124,
+ "step": 5038,
+ "time": 14.0
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0031e-04",
+ "loss": 0.5228,
+ "slid_loss": 0.5116,
+ "step": 5039,
+ "time": 13.71
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0031e-04",
+ "loss": 0.4927,
+ "slid_loss": 0.5115,
+ "step": 5040,
+ "time": 11.13
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0030e-04",
+ "loss": 0.4716,
+ "slid_loss": 0.5113,
+ "step": 5041,
+ "time": 11.68
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0030e-04",
+ "loss": 0.4168,
+ "slid_loss": 0.5108,
+ "step": 5042,
+ "time": 13.8
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": "1.0029e-04",
+ "loss": 0.4384,
+ "slid_loss": 0.5093,
+ "step": 5043,
+ "time": 13.69
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0029e-04",
+ "loss": 0.4708,
+ "slid_loss": 0.5094,
+ "step": 5044,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0029e-04",
+ "loss": 0.6018,
+ "slid_loss": 0.5101,
+ "step": 5045,
+ "time": 13.72
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0028e-04",
+ "loss": 0.4657,
+ "slid_loss": 0.5093,
+ "step": 5046,
+ "time": 13.71
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0028e-04",
+ "loss": 0.6058,
+ "slid_loss": 0.5104,
+ "step": 5047,
+ "time": 13.7
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0028e-04",
+ "loss": 0.5254,
+ "slid_loss": 0.5112,
+ "step": 5048,
+ "time": 13.37
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0027e-04",
+ "loss": 0.477,
+ "slid_loss": 0.5104,
+ "step": 5049,
+ "time": 12.24
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0027e-04",
+ "loss": 0.5665,
+ "slid_loss": 0.5107,
+ "step": 5050,
+ "time": 13.77
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0027e-04",
+ "loss": 0.4572,
+ "slid_loss": 0.5103,
+ "step": 5051,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0026e-04",
+ "loss": 0.5233,
+ "slid_loss": 0.5101,
+ "step": 5052,
+ "time": 13.28
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0026e-04",
+ "loss": 0.5242,
+ "slid_loss": 0.5094,
+ "step": 5053,
+ "time": 13.53
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": "1.0026e-04",
+ "loss": 0.5386,
+ "slid_loss": 0.5098,
+ "step": 5054,
+ "time": 13.82
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0025e-04",
+ "loss": 0.5242,
+ "slid_loss": 0.5095,
+ "step": 5055,
+ "time": 13.34
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0025e-04",
+ "loss": 0.521,
+ "slid_loss": 0.5097,
+ "step": 5056,
+ "time": 12.53
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0025e-04",
+ "loss": 0.4955,
+ "slid_loss": 0.5092,
+ "step": 5057,
+ "time": 12.58
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0024e-04",
+ "loss": 0.5511,
+ "slid_loss": 0.5093,
+ "step": 5058,
+ "time": 12.93
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0024e-04",
+ "loss": 0.5867,
+ "slid_loss": 0.5099,
+ "step": 5059,
+ "time": 13.82
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0024e-04",
+ "loss": 0.4963,
+ "slid_loss": 0.5101,
+ "step": 5060,
+ "time": 11.52
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0023e-04",
+ "loss": 0.5654,
+ "slid_loss": 0.51,
+ "step": 5061,
+ "time": 13.55
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0023e-04",
+ "loss": 0.5072,
+ "slid_loss": 0.5103,
+ "step": 5062,
+ "time": 13.13
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0023e-04",
+ "loss": 0.508,
+ "slid_loss": 0.5109,
+ "step": 5063,
+ "time": 12.91
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": "1.0022e-04",
+ "loss": 0.4966,
+ "slid_loss": 0.51,
+ "step": 5064,
+ "time": 12.93
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0022e-04",
+ "loss": 0.5723,
+ "slid_loss": 0.5103,
+ "step": 5065,
+ "time": 13.53
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0022e-04",
+ "loss": 0.4937,
+ "slid_loss": 0.5096,
+ "step": 5066,
+ "time": 12.23
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0021e-04",
+ "loss": 0.5286,
+ "slid_loss": 0.5103,
+ "step": 5067,
+ "time": 11.82
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0021e-04",
+ "loss": 0.496,
+ "slid_loss": 0.5105,
+ "step": 5068,
+ "time": 12.93
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0021e-04",
+ "loss": 0.6164,
+ "slid_loss": 0.5109,
+ "step": 5069,
+ "time": 14.18
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0020e-04",
+ "loss": 0.4751,
+ "slid_loss": 0.5106,
+ "step": 5070,
+ "time": 12.47
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0020e-04",
+ "loss": 0.4714,
+ "slid_loss": 0.5108,
+ "step": 5071,
+ "time": 11.37
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0020e-04",
+ "loss": 0.5086,
+ "slid_loss": 0.5108,
+ "step": 5072,
+ "time": 13.06
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0020e-04",
+ "loss": 0.5588,
+ "slid_loss": 0.5114,
+ "step": 5073,
+ "time": 13.89
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": "1.0019e-04",
+ "loss": 0.4988,
+ "slid_loss": 0.511,
+ "step": 5074,
+ "time": 13.58
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0019e-04",
+ "loss": 0.5026,
+ "slid_loss": 0.5103,
+ "step": 5075,
+ "time": 11.67
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0019e-04",
+ "loss": 0.5332,
+ "slid_loss": 0.5102,
+ "step": 5076,
+ "time": 13.48
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0018e-04",
+ "loss": 0.5369,
+ "slid_loss": 0.5105,
+ "step": 5077,
+ "time": 12.31
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0018e-04",
+ "loss": 0.5436,
+ "slid_loss": 0.5113,
+ "step": 5078,
+ "time": 13.66
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0018e-04",
+ "loss": 0.4862,
+ "slid_loss": 0.5113,
+ "step": 5079,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0018e-04",
+ "loss": 0.4378,
+ "slid_loss": 0.5101,
+ "step": 5080,
+ "time": 11.28
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0017e-04",
+ "loss": 0.5362,
+ "slid_loss": 0.5106,
+ "step": 5081,
+ "time": 13.74
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0017e-04",
+ "loss": 0.5019,
+ "slid_loss": 0.5106,
+ "step": 5082,
+ "time": 11.54
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0017e-04",
+ "loss": 0.5193,
+ "slid_loss": 0.5107,
+ "step": 5083,
+ "time": 14.52
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0016e-04",
+ "loss": 0.5521,
+ "slid_loss": 0.5109,
+ "step": 5084,
+ "time": 11.84
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": "1.0016e-04",
+ "loss": 0.5416,
+ "slid_loss": 0.5107,
+ "step": 5085,
+ "time": 13.52
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0016e-04",
+ "loss": 0.4548,
+ "slid_loss": 0.5101,
+ "step": 5086,
+ "time": 13.86
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0016e-04",
+ "loss": 0.5053,
+ "slid_loss": 0.5107,
+ "step": 5087,
+ "time": 11.37
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0015e-04",
+ "loss": 0.5044,
+ "slid_loss": 0.5112,
+ "step": 5088,
+ "time": 13.49
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0015e-04",
+ "loss": 0.5102,
+ "slid_loss": 0.5112,
+ "step": 5089,
+ "time": 13.39
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0015e-04",
+ "loss": 0.484,
+ "slid_loss": 0.5118,
+ "step": 5090,
+ "time": 12.74
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0015e-04",
+ "loss": 0.4358,
+ "slid_loss": 0.5121,
+ "step": 5091,
+ "time": 11.93
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0014e-04",
+ "loss": 0.4838,
+ "slid_loss": 0.5119,
+ "step": 5092,
+ "time": 13.43
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0014e-04",
+ "loss": 0.5536,
+ "slid_loss": 0.5115,
+ "step": 5093,
+ "time": 13.14
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0014e-04",
+ "loss": 0.5303,
+ "slid_loss": 0.5111,
+ "step": 5094,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": "1.0014e-04",
+ "loss": 0.4498,
+ "slid_loss": 0.5107,
+ "step": 5095,
+ "time": 13.38
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0013e-04",
+ "loss": 0.5614,
+ "slid_loss": 0.5109,
+ "step": 5096,
+ "time": 11.33
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0013e-04",
+ "loss": 0.5445,
+ "slid_loss": 0.5124,
+ "step": 5097,
+ "time": 13.93
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0013e-04",
+ "loss": 0.4069,
+ "slid_loss": 0.5115,
+ "step": 5098,
+ "time": 14.06
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0013e-04",
+ "loss": 0.5411,
+ "slid_loss": 0.5117,
+ "step": 5099,
+ "time": 11.34
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0012e-04",
+ "loss": 0.4739,
+ "slid_loss": 0.5117,
+ "step": 5100,
+ "time": 11.55
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0012e-04",
+ "loss": 0.5741,
+ "slid_loss": 0.5122,
+ "step": 5101,
+ "time": 11.66
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0012e-04",
+ "loss": 0.502,
+ "slid_loss": 0.5126,
+ "step": 5102,
+ "time": 11.93
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0012e-04",
+ "loss": 0.5658,
+ "slid_loss": 0.5135,
+ "step": 5103,
+ "time": 13.21
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0011e-04",
+ "loss": 0.5497,
+ "slid_loss": 0.5136,
+ "step": 5104,
+ "time": 11.55
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0011e-04",
+ "loss": 0.4897,
+ "slid_loss": 0.5132,
+ "step": 5105,
+ "time": 11.56
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": "1.0011e-04",
+ "loss": 0.5316,
+ "slid_loss": 0.5138,
+ "step": 5106,
+ "time": 14.29
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0011e-04",
+ "loss": 0.5878,
+ "slid_loss": 0.5148,
+ "step": 5107,
+ "time": 11.22
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0011e-04",
+ "loss": 0.5206,
+ "slid_loss": 0.5146,
+ "step": 5108,
+ "time": 13.74
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0010e-04",
+ "loss": 0.4889,
+ "slid_loss": 0.5143,
+ "step": 5109,
+ "time": 13.45
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0010e-04",
+ "loss": 0.5671,
+ "slid_loss": 0.5141,
+ "step": 5110,
+ "time": 13.94
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0010e-04",
+ "loss": 0.5685,
+ "slid_loss": 0.515,
+ "step": 5111,
+ "time": 13.84
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0010e-04",
+ "loss": 0.5259,
+ "slid_loss": 0.514,
+ "step": 5112,
+ "time": 13.7
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0010e-04",
+ "loss": 0.5225,
+ "slid_loss": 0.5143,
+ "step": 5113,
+ "time": 13.7
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0009e-04",
+ "loss": 0.4598,
+ "slid_loss": 0.5134,
+ "step": 5114,
+ "time": 11.98
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0009e-04",
+ "loss": 0.5688,
+ "slid_loss": 0.5134,
+ "step": 5115,
+ "time": 12.86
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": "1.0009e-04",
+ "loss": 0.499,
+ "slid_loss": 0.5138,
+ "step": 5116,
+ "time": 10.7
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0009e-04",
+ "loss": 0.511,
+ "slid_loss": 0.513,
+ "step": 5117,
+ "time": 13.47
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0009e-04",
+ "loss": 0.4455,
+ "slid_loss": 0.5117,
+ "step": 5118,
+ "time": 13.65
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0008e-04",
+ "loss": 0.4618,
+ "slid_loss": 0.5109,
+ "step": 5119,
+ "time": 13.33
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0008e-04",
+ "loss": 0.5372,
+ "slid_loss": 0.5113,
+ "step": 5120,
+ "time": 11.99
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0008e-04",
+ "loss": 0.5449,
+ "slid_loss": 0.5118,
+ "step": 5121,
+ "time": 13.57
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0008e-04",
+ "loss": 0.6312,
+ "slid_loss": 0.5132,
+ "step": 5122,
+ "time": 11.61
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0008e-04",
+ "loss": 0.5028,
+ "slid_loss": 0.5135,
+ "step": 5123,
+ "time": 13.14
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0007e-04",
+ "loss": 0.5731,
+ "slid_loss": 0.5138,
+ "step": 5124,
+ "time": 13.92
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0007e-04",
+ "loss": 0.5178,
+ "slid_loss": 0.5141,
+ "step": 5125,
+ "time": 12.87
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": "1.0007e-04",
+ "loss": 0.4737,
+ "slid_loss": 0.5138,
+ "step": 5126,
+ "time": 13.75
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0007e-04",
+ "loss": 0.5634,
+ "slid_loss": 0.5143,
+ "step": 5127,
+ "time": 12.68
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0007e-04",
+ "loss": 0.5468,
+ "slid_loss": 0.5151,
+ "step": 5128,
+ "time": 12.88
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0006e-04",
+ "loss": 0.5456,
+ "slid_loss": 0.5152,
+ "step": 5129,
+ "time": 13.32
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0006e-04",
+ "loss": 0.4488,
+ "slid_loss": 0.5149,
+ "step": 5130,
+ "time": 11.68
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0006e-04",
+ "loss": 0.5207,
+ "slid_loss": 0.5153,
+ "step": 5131,
+ "time": 12.61
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0006e-04",
+ "loss": 0.4721,
+ "slid_loss": 0.5147,
+ "step": 5132,
+ "time": 13.26
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0006e-04",
+ "loss": 0.5193,
+ "slid_loss": 0.5148,
+ "step": 5133,
+ "time": 11.12
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0006e-04",
+ "loss": 0.4524,
+ "slid_loss": 0.5139,
+ "step": 5134,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0006e-04",
+ "loss": 0.5952,
+ "slid_loss": 0.5151,
+ "step": 5135,
+ "time": 13.93
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0005e-04",
+ "loss": 0.5182,
+ "slid_loss": 0.5151,
+ "step": 5136,
+ "time": 13.63
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": "1.0005e-04",
+ "loss": 0.4437,
+ "slid_loss": 0.5153,
+ "step": 5137,
+ "time": 12.24
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0005e-04",
+ "loss": 0.5045,
+ "slid_loss": 0.5155,
+ "step": 5138,
+ "time": 13.32
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0005e-04",
+ "loss": 0.5059,
+ "slid_loss": 0.5154,
+ "step": 5139,
+ "time": 12.9
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0005e-04",
+ "loss": 0.4416,
+ "slid_loss": 0.5148,
+ "step": 5140,
+ "time": 14.01
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0005e-04",
+ "loss": 0.5812,
+ "slid_loss": 0.5159,
+ "step": 5141,
+ "time": 13.14
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0004e-04",
+ "loss": 0.52,
+ "slid_loss": 0.517,
+ "step": 5142,
+ "time": 12.38
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0004e-04",
+ "loss": 0.4483,
+ "slid_loss": 0.5171,
+ "step": 5143,
+ "time": 13.25
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0004e-04",
+ "loss": 0.4625,
+ "slid_loss": 0.517,
+ "step": 5144,
+ "time": 13.64
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0004e-04",
+ "loss": 0.4795,
+ "slid_loss": 0.5158,
+ "step": 5145,
+ "time": 13.73
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0004e-04",
+ "loss": 0.5096,
+ "slid_loss": 0.5162,
+ "step": 5146,
+ "time": 11.42
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": "1.0004e-04",
+ "loss": 0.4762,
+ "slid_loss": 0.5149,
+ "step": 5147,
+ "time": 12.74
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0004e-04",
+ "loss": 0.4907,
+ "slid_loss": 0.5146,
+ "step": 5148,
+ "time": 13.29
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0004e-04",
+ "loss": 0.494,
+ "slid_loss": 0.5147,
+ "step": 5149,
+ "time": 14.12
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0003e-04",
+ "loss": 0.4353,
+ "slid_loss": 0.5134,
+ "step": 5150,
+ "time": 12.91
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0003e-04",
+ "loss": 0.534,
+ "slid_loss": 0.5142,
+ "step": 5151,
+ "time": 13.44
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0003e-04",
+ "loss": 0.4997,
+ "slid_loss": 0.514,
+ "step": 5152,
+ "time": 12.17
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0003e-04",
+ "loss": 0.4979,
+ "slid_loss": 0.5137,
+ "step": 5153,
+ "time": 13.66
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0003e-04",
+ "loss": 0.5482,
+ "slid_loss": 0.5138,
+ "step": 5154,
+ "time": 13.75
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0003e-04",
+ "loss": 0.5279,
+ "slid_loss": 0.5138,
+ "step": 5155,
+ "time": 12.15
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0003e-04",
+ "loss": 0.4653,
+ "slid_loss": 0.5133,
+ "step": 5156,
+ "time": 12.8
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0003e-04",
+ "loss": 0.4983,
+ "slid_loss": 0.5133,
+ "step": 5157,
+ "time": 12.96
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.3868,
+ "slid_loss": 0.5116,
+ "step": 5158,
+ "time": 12.0
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.5163,
+ "slid_loss": 0.5109,
+ "step": 5159,
+ "time": 13.55
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.4181,
+ "slid_loss": 0.5102,
+ "step": 5160,
+ "time": 11.98
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.3912,
+ "slid_loss": 0.5084,
+ "step": 5161,
+ "time": 12.9
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.4785,
+ "slid_loss": 0.5081,
+ "step": 5162,
+ "time": 13.73
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.5481,
+ "slid_loss": 0.5085,
+ "step": 5163,
+ "time": 13.86
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.4677,
+ "slid_loss": 0.5082,
+ "step": 5164,
+ "time": 13.44
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.529,
+ "slid_loss": 0.5078,
+ "step": 5165,
+ "time": 13.12
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.536,
+ "slid_loss": 0.5082,
+ "step": 5166,
+ "time": 13.66
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.5129,
+ "slid_loss": 0.5081,
+ "step": 5167,
+ "time": 11.2
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": "1.0002e-04",
+ "loss": 0.531,
+ "slid_loss": 0.5084,
+ "step": 5168,
+ "time": 12.3
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.4764,
+ "slid_loss": 0.507,
+ "step": 5169,
+ "time": 13.5
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.5513,
+ "slid_loss": 0.5078,
+ "step": 5170,
+ "time": 11.95
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.4493,
+ "slid_loss": 0.5076,
+ "step": 5171,
+ "time": 13.49
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.5211,
+ "slid_loss": 0.5077,
+ "step": 5172,
+ "time": 13.61
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.4601,
+ "slid_loss": 0.5067,
+ "step": 5173,
+ "time": 13.38
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.5012,
+ "slid_loss": 0.5067,
+ "step": 5174,
+ "time": 14.01
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.5893,
+ "slid_loss": 0.5076,
+ "step": 5175,
+ "time": 12.05
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.5335,
+ "slid_loss": 0.5076,
+ "step": 5176,
+ "time": 13.6
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.5616,
+ "slid_loss": 0.5079,
+ "step": 5177,
+ "time": 11.15
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.4496,
+ "slid_loss": 0.5069,
+ "step": 5178,
+ "time": 11.32
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.529,
+ "slid_loss": 0.5073,
+ "step": 5179,
+ "time": 12.34
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.4713,
+ "slid_loss": 0.5077,
+ "step": 5180,
+ "time": 12.58
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.4871,
+ "slid_loss": 0.5072,
+ "step": 5181,
+ "time": 13.46
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.502,
+ "slid_loss": 0.5072,
+ "step": 5182,
+ "time": 11.73
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0001e-04",
+ "loss": 0.4633,
+ "slid_loss": 0.5066,
+ "step": 5183,
+ "time": 11.44
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4947,
+ "slid_loss": 0.5061,
+ "step": 5184,
+ "time": 13.24
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4861,
+ "slid_loss": 0.5055,
+ "step": 5185,
+ "time": 14.09
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4987,
+ "slid_loss": 0.5059,
+ "step": 5186,
+ "time": 14.07
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5013,
+ "slid_loss": 0.5059,
+ "step": 5187,
+ "time": 13.59
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5147,
+ "slid_loss": 0.506,
+ "step": 5188,
+ "time": 14.3
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5712,
+ "slid_loss": 0.5066,
+ "step": 5189,
+ "time": 12.85
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4385,
+ "slid_loss": 0.5062,
+ "step": 5190,
+ "time": 13.0
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4915,
+ "slid_loss": 0.5067,
+ "step": 5191,
+ "time": 11.89
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4228,
+ "slid_loss": 0.5061,
+ "step": 5192,
+ "time": 11.42
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5071,
+ "slid_loss": 0.5056,
+ "step": 5193,
+ "time": 11.68
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5088,
+ "slid_loss": 0.5054,
+ "step": 5194,
+ "time": 13.17
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4227,
+ "slid_loss": 0.5052,
+ "step": 5195,
+ "time": 13.42
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5399,
+ "slid_loss": 0.5049,
+ "step": 5196,
+ "time": 14.41
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4837,
+ "slid_loss": 0.5043,
+ "step": 5197,
+ "time": 14.53
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4845,
+ "slid_loss": 0.5051,
+ "step": 5198,
+ "time": 13.23
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4553,
+ "slid_loss": 0.5042,
+ "step": 5199,
+ "time": 13.7
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5611,
+ "slid_loss": 0.5051,
+ "step": 5200,
+ "time": 13.83
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5229,
+ "slid_loss": 0.5046,
+ "step": 5201,
+ "time": 11.92
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.4734,
+ "slid_loss": 0.5043,
+ "step": 5202,
+ "time": 13.74
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5101,
+ "slid_loss": 0.5038,
+ "step": 5203,
+ "time": 12.1
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5,
+ "slid_loss": 0.5033,
+ "step": 5204,
+ "time": 13.38
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": "1.0000e-04",
+ "loss": 0.5093,
+ "slid_loss": 0.5035,
+ "step": 5205,
+ "time": 12.95
+ }
+ ],
+ "logging_steps": 1.0,
+ "max_steps": 5205,
+ "num_train_epochs": 5,
+ "save_steps": 50000.0,
+ "total_flos": 0.0,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ef7fc47d262b3915853100f2393e7e8b78eee115
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16477a3093bec705d5c953a42339f69d1b8ddd7d862360831e7649065bce7c5d
+size 6075
diff --git a/zero_to_fp32.py b/zero_to_fp32.py
new file mode 100644
index 0000000000000000000000000000000000000000..c98caae31534368be22b67fc4ae906836c992a8d
--- /dev/null
+++ b/zero_to_fp32.py
@@ -0,0 +1,587 @@
+#!/usr/bin/env python
+
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
+# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
+# the future. Once extracted, the weights don't require DeepSpeed and can be used in any
+# application.
+#
+# example: python zero_to_fp32.py . pytorch_model.bin
+
+import argparse
+import torch
+import glob
+import math
+import os
+import re
+from collections import OrderedDict
+from dataclasses import dataclass
+
+# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
+# DeepSpeed data structures it has to be available in the current python environment.
+from deepspeed.utils import logger
+from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
+ FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
+ FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
+
+
+@dataclass
+class zero_model_state:
+ buffers: dict()
+ param_shapes: dict()
+ shared_params: list
+ ds_version: int
+ frozen_param_shapes: dict()
+ frozen_param_fragments: dict()
+
+
+debug = 0
+
+# load to cpu
+device = torch.device('cpu')
+
+
+def atoi(text):
+ return int(text) if text.isdigit() else text
+
+
+def natural_keys(text):
+ '''
+ alist.sort(key=natural_keys) sorts in human order
+ http://nedbatchelder.com/blog/200712/human_sorting.html
+ (See Toothy's implementation in the comments)
+ '''
+ return [atoi(c) for c in re.split(r'(\d+)', text)]
+
+
+def get_model_state_file(checkpoint_dir, zero_stage):
+ if not os.path.isdir(checkpoint_dir):
+ raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
+
+ # there should be only one file
+ if zero_stage <= 2:
+ file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
+ elif zero_stage == 3:
+ file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
+
+ if not os.path.exists(file):
+ raise FileNotFoundError(f"can't find model states file at '{file}'")
+
+ return file
+
+
+def get_checkpoint_files(checkpoint_dir, glob_pattern):
+ # XXX: need to test that this simple glob rule works for multi-node setup too
+ ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
+
+ if len(ckpt_files) == 0:
+ raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
+
+ return ckpt_files
+
+
+def get_optim_files(checkpoint_dir):
+ return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
+
+
+def get_model_state_files(checkpoint_dir):
+ return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
+
+
+def parse_model_states(files):
+ zero_model_states = []
+ for file in files:
+ state_dict = torch.load(file, map_location=device)
+
+ if BUFFER_NAMES not in state_dict:
+ raise ValueError(f"{file} is not a model state checkpoint")
+ buffer_names = state_dict[BUFFER_NAMES]
+ if debug:
+ print("Found buffers:", buffer_names)
+
+ # recover just the buffers while restoring them to fp32 if they were saved in fp16
+ buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
+ param_shapes = state_dict[PARAM_SHAPES]
+
+ # collect parameters that are included in param_shapes
+ param_names = []
+ for s in param_shapes:
+ for name in s.keys():
+ param_names.append(name)
+
+ # update with frozen parameters
+ frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
+ if frozen_param_shapes is not None:
+ if debug:
+ print(f"Found frozen_param_shapes: {frozen_param_shapes}")
+ param_names += list(frozen_param_shapes.keys())
+
+ # handle shared params
+ shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
+
+ ds_version = state_dict.get(DS_VERSION, None)
+
+ frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
+
+ z_model_state = zero_model_state(buffers=buffers,
+ param_shapes=param_shapes,
+ shared_params=shared_params,
+ ds_version=ds_version,
+ frozen_param_shapes=frozen_param_shapes,
+ frozen_param_fragments=frozen_param_fragments)
+ zero_model_states.append(z_model_state)
+
+ return zero_model_states
+
+
+def parse_optim_states(files, ds_checkpoint_dir):
+
+ total_files = len(files)
+ state_dicts = []
+ for f in files:
+ state_dict = torch.load(f, map_location=device)
+ # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
+ # and also handle the case where it was already removed by another helper script
+ state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
+ state_dicts.append(state_dict)
+
+ if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
+ raise ValueError(f"{files[0]} is not a zero checkpoint")
+ zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
+ world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
+
+ # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
+ # parameters can be different from data parallelism for non-expert parameters. So we can just
+ # use the max of the partition_count to get the dp world_size.
+
+ if type(world_size) is list:
+ world_size = max(world_size)
+
+ if world_size != total_files:
+ raise ValueError(
+ f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
+ "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
+ )
+
+ # the groups are named differently in each stage
+ if zero_stage <= 2:
+ fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
+ elif zero_stage == 3:
+ fp32_groups_key = FP32_FLAT_GROUPS
+ else:
+ raise ValueError(f"unknown zero stage {zero_stage}")
+
+ if zero_stage <= 2:
+ fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
+ elif zero_stage == 3:
+ # if there is more than one param group, there will be multiple flattened tensors - one
+ # flattened tensor per group - for simplicity merge them into a single tensor
+ #
+ # XXX: could make the script more memory efficient for when there are multiple groups - it
+ # will require matching the sub-lists of param_shapes for each param group flattened tensor
+
+ fp32_flat_groups = [
+ torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts))
+ ]
+
+ return zero_stage, world_size, fp32_flat_groups
+
+
+def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir):
+ """
+ Returns fp32 state_dict reconstructed from ds checkpoint
+
+ Args:
+ - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
+
+ """
+ print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
+
+ optim_files = get_optim_files(ds_checkpoint_dir)
+ zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
+ print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
+
+ model_files = get_model_state_files(ds_checkpoint_dir)
+
+ zero_model_states = parse_model_states(model_files)
+ print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
+
+ if zero_stage <= 2:
+ return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states)
+ elif zero_stage == 3:
+ return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states)
+
+
+def _zero2_merge_frozen_params(state_dict, zero_model_states):
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+ return
+
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+ frozen_param_fragments = zero_model_states[0].frozen_param_fragments
+
+ if debug:
+ num_elem = sum(s.numel() for s in frozen_param_shapes.values())
+ print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+
+ wanted_params = len(frozen_param_shapes)
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+ avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
+ print(f'Frozen params: Have {avail_numel} numels to process.')
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+
+ total_params = 0
+ total_numel = 0
+ for name, shape in frozen_param_shapes.items():
+ total_params += 1
+ unpartitioned_numel = shape.numel()
+ total_numel += unpartitioned_numel
+
+ state_dict[name] = frozen_param_fragments[name]
+
+ if debug:
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+
+
+def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+ param_shapes = zero_model_states[0].param_shapes
+
+ # Reconstruction protocol:
+ #
+ # XXX: document this
+
+ if debug:
+ for i in range(world_size):
+ for j in range(len(fp32_flat_groups[0])):
+ print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
+
+ # XXX: memory usage doubles here (zero2)
+ num_param_groups = len(fp32_flat_groups[0])
+ merged_single_partition_of_fp32_groups = []
+ for i in range(num_param_groups):
+ merged_partitions = [sd[i] for sd in fp32_flat_groups]
+ full_single_fp32_vector = torch.cat(merged_partitions, 0)
+ merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
+ avail_numel = sum(
+ [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
+
+ if debug:
+ wanted_params = sum([len(shapes) for shapes in param_shapes])
+ wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
+ # not asserting if there is a mismatch due to possible padding
+ print(f"Have {avail_numel} numels to process.")
+ print(f"Need {wanted_numel} numels in {wanted_params} params.")
+
+ # params
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+ # out-of-core computing solution
+ total_numel = 0
+ total_params = 0
+ for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
+ offset = 0
+ avail_numel = full_single_fp32_vector.numel()
+ for name, shape in shapes.items():
+
+ unpartitioned_numel = shape.numel()
+ total_numel += unpartitioned_numel
+ total_params += 1
+
+ if debug:
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+ state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
+ offset += unpartitioned_numel
+
+ # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
+ # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
+ # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
+ # live optimizer object, so we are checking that the numbers are within the right range
+ align_to = 2 * world_size
+
+ def zero2_align(x):
+ return align_to * math.ceil(x / align_to)
+
+ if debug:
+ print(f"original offset={offset}, avail_numel={avail_numel}")
+
+ offset = zero2_align(offset)
+ avail_numel = zero2_align(avail_numel)
+
+ if debug:
+ print(f"aligned offset={offset}, avail_numel={avail_numel}")
+
+ # Sanity check
+ if offset != avail_numel:
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+
+ print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
+
+
+def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states):
+ state_dict = OrderedDict()
+
+ # buffers
+ buffers = zero_model_states[0].buffers
+ state_dict.update(buffers)
+ if debug:
+ print(f"added {len(buffers)} buffers")
+
+ _zero2_merge_frozen_params(state_dict, zero_model_states)
+
+ _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+
+ # recover shared parameters
+ for pair in zero_model_states[0].shared_params:
+ if pair[1] in state_dict:
+ state_dict[pair[0]] = state_dict[pair[1]]
+
+ return state_dict
+
+
+def zero3_partitioned_param_info(unpartitioned_numel, world_size):
+ remainder = unpartitioned_numel % world_size
+ padding_numel = (world_size - remainder) if remainder else 0
+ partitioned_numel = math.ceil(unpartitioned_numel / world_size)
+ return partitioned_numel, padding_numel
+
+
+def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+ return
+
+ if debug:
+ for i in range(world_size):
+ num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
+ print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+ wanted_params = len(frozen_param_shapes)
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+ avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
+ print(f'Frozen params: Have {avail_numel} numels to process.')
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+
+ total_params = 0
+ total_numel = 0
+ for name, shape in zero_model_states[0].frozen_param_shapes.items():
+ total_params += 1
+ unpartitioned_numel = shape.numel()
+ total_numel += unpartitioned_numel
+
+ param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
+ state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
+
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+
+ if debug:
+ print(
+ f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+ )
+
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+
+
+def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+ param_shapes = zero_model_states[0].param_shapes
+ avail_numel = fp32_flat_groups[0].numel() * world_size
+ # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
+ # param, re-consolidating each param, while dealing with padding if any
+
+ # merge list of dicts, preserving order
+ param_shapes = {k: v for d in param_shapes for k, v in d.items()}
+
+ if debug:
+ for i in range(world_size):
+ print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
+
+ wanted_params = len(param_shapes)
+ wanted_numel = sum(shape.numel() for shape in param_shapes.values())
+ # not asserting if there is a mismatch due to possible padding
+ avail_numel = fp32_flat_groups[0].numel() * world_size
+ print(f"Trainable params: Have {avail_numel} numels to process.")
+ print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
+
+ # params
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+ # out-of-core computing solution
+ offset = 0
+ total_numel = 0
+ total_params = 0
+ for name, shape in param_shapes.items():
+
+ unpartitioned_numel = shape.numel()
+ total_numel += unpartitioned_numel
+ total_params += 1
+
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+
+ if debug:
+ print(
+ f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+ )
+
+ # XXX: memory usage doubles here
+ state_dict[name] = torch.cat(
+ tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)),
+ 0).narrow(0, 0, unpartitioned_numel).view(shape)
+ offset += partitioned_numel
+
+ offset *= world_size
+
+ # Sanity check
+ if offset != avail_numel:
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+
+ print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
+
+
+def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states):
+ state_dict = OrderedDict()
+
+ # buffers
+ buffers = zero_model_states[0].buffers
+ state_dict.update(buffers)
+ if debug:
+ print(f"added {len(buffers)} buffers")
+
+ _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
+
+ _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+
+ # recover shared parameters
+ for pair in zero_model_states[0].shared_params:
+ if pair[1] in state_dict:
+ state_dict[pair[0]] = state_dict[pair[1]]
+
+ return state_dict
+
+
+def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None):
+ """
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
+ ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
+ via a model hub.
+
+ Args:
+ - ``checkpoint_dir``: path to the desired checkpoint folder
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
+
+ Returns:
+ - pytorch ``state_dict``
+
+ Note: this approach may not work if your application doesn't have sufficient free CPU memory and
+ you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
+ the checkpoint.
+
+ A typical usage might be ::
+
+ from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+ # do the training and checkpoint saving
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
+ model = model.cpu() # move to cpu
+ model.load_state_dict(state_dict)
+ # submit to model hub or save the model to share with others
+
+ In this example the ``model`` will no longer be usable in the deepspeed context of the same
+ application. i.e. you will need to re-initialize the deepspeed engine, since
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+
+ If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
+
+ """
+ if tag is None:
+ latest_path = os.path.join(checkpoint_dir, 'latest')
+ if os.path.isfile(latest_path):
+ with open(latest_path, 'r') as fd:
+ tag = fd.read().strip()
+ else:
+ raise ValueError(f"Unable to find 'latest' file at {latest_path}")
+
+ ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
+
+ if not os.path.isdir(ds_checkpoint_dir):
+ raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
+
+ return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir)
+
+
+def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None):
+ """
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
+ loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
+
+ Args:
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+ - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin)
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+ """
+
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
+ print(f"Saving fp32 state dict to {output_file}")
+ torch.save(state_dict, output_file)
+
+
+def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
+ """
+ 1. Put the provided model to cpu
+ 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
+ 3. Load it into the provided model
+
+ Args:
+ - ``model``: the model object to update
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+
+ Returns:
+ - ``model`: modified model
+
+ Make sure you have plenty of CPU memory available before you call this function. If you don't
+ have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
+ conveniently placed for you in the checkpoint folder.
+
+ A typical usage might be ::
+
+ from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+ model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+ # submit to model hub or save the model to share with others
+
+ Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
+ of the same application. i.e. you will need to re-initialize the deepspeed engine, since
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+
+ """
+ logger.info(f"Extracting fp32 weights")
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
+
+ logger.info(f"Overwriting model with fp32 weights")
+ model = model.cpu()
+ model.load_state_dict(state_dict, strict=False)
+
+ return model
+
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("checkpoint_dir",
+ type=str,
+ help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
+ parser.add_argument(
+ "output_file",
+ type=str,
+ help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)")
+ parser.add_argument("-t",
+ "--tag",
+ type=str,
+ default=None,
+ help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
+ parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
+ args = parser.parse_args()
+
+ debug = args.debug
+
+ convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, args.output_file, tag=args.tag)