Training in progress, step 225, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:205d6e49f052c68e31e4b4d48cfda1756d56bdd8b8f793f6885d29be396c52c7
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:18fdbc0c4f0a856bb851756d41e848c7f8da8cdd3c8bb06d2d6bed157cd6e47f
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce6b050b1c5edf6aec30582163726e1425f3b8a84ebc8293985d0a47f0b28b43
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:593b4b5069ea3e92bd5dd2bd90ebd76f7ec8bccb3efe79a2301c144ab29a07b4
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc86b00df74af7c3ae7c1114b70673a81bf15a56d4e6f0edaf89c595f91d6339
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:74a5cf5e988307b7bc5ac99195c5626f83509282efb2453403481b5a9c2074a3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67b93879c4e6775563c05d16d872585eaef843648c94363ef6e0e8ca9fabb503
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:70b35dc00d38fbc94393b01bb750de67a9e73be3bb058ea334b16afbeab55729
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2846975088967972,
   "eval_steps": 386,
-  "global_step": 220,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1555,6 +1555,41 @@
       "learning_rate": 9.950957071399357e-05,
       "loss": 0.8541,
       "step": 220
     }
   ],
   "logging_steps": 1,
@@ -1574,7 +1609,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.4594617777258496e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2911679068262698,
   "eval_steps": 386,
+  "global_step": 225,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.950957071399357e-05,
       "loss": 0.8541,
       "step": 220
+    },
+    {
+      "epoch": 0.2859915884826917,
+      "grad_norm": 0.8993477821350098,
+      "learning_rate": 9.950379357812543e-05,
+      "loss": 1.0253,
+      "step": 221
+    },
+    {
+      "epoch": 0.28728566806858624,
+      "grad_norm": 1.073880910873413,
+      "learning_rate": 9.949798278426158e-05,
+      "loss": 1.115,
+      "step": 222
+    },
+    {
+      "epoch": 0.28857974765448074,
+      "grad_norm": 0.7941976189613342,
+      "learning_rate": 9.949213833635285e-05,
+      "loss": 0.9398,
+      "step": 223
+    },
+    {
+      "epoch": 0.2898738272403753,
+      "grad_norm": 0.798089325428009,
+      "learning_rate": 9.948626023837291e-05,
+      "loss": 0.8523,
+      "step": 224
+    },
+    {
+      "epoch": 0.2911679068262698,
+      "grad_norm": 1.0251280069351196,
+      "learning_rate": 9.948034849431831e-05,
+      "loss": 0.939,
+      "step": 225
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.515358636310528e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null