Training in progress, step 95, checkpoint

Files changed (4) hide show

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62f8a18051375bb2300e3fa0539ea32cc58c069e699475d922439f4d3ec62dd6
 size 328468404

 version https://git-lfs.github.com/spec/v1
+oid sha256:b05e40b12bc26ffe48519e0c6ac0c4fa28c062dbd4322eae972a078f050e4cda
 size 328468404

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f926624316f3aacab8693acf0936fad8a6b5aa7abde866f6087f9c50c31b0ba7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7bf3d2ecd3db84661bfa7258cde34588f7279fae068e5a936704e115fd83ec9a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f6423aaf07b0a3e5bef1b21c59ae6d997dd59505ca758247471609a32b152cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:60ca561a785d3802440b426c58aafe0f1cf10dc4bab5c0b5dbec38821026a8aa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.05101390128810101,
   "eval_steps": 55,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -205,6 +205,41 @@
       "learning_rate": 3.12696703292044e-05,
       "loss": 0.0,
       "step": 78
     }
   ],
   "logging_steps": 3,
@@ -219,12 +254,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.768005016158208e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.06057900777961995,
   "eval_steps": 55,
+  "global_step": 95,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.12696703292044e-05,
       "loss": 0.0,
       "step": 78
+    },
+    {
+      "epoch": 0.05165157505420227,
+      "grad_norm": NaN,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 0.0,
+      "step": 81
+    },
+    {
+      "epoch": 0.05356459635250606,
+      "grad_norm": NaN,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.314,
+      "step": 84
+    },
+    {
+      "epoch": 0.05547761765080984,
+      "grad_norm": NaN,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 0.0,
+      "step": 87
+    },
+    {
+      "epoch": 0.057390638949113636,
+      "grad_norm": NaN,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.0,
+      "step": 90
+    },
+    {
+      "epoch": 0.05930366024741742,
+      "grad_norm": NaN,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.0,
+      "step": 93
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.470062554546176e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null