Training in progress, step 820, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33bf95807022ad8256390457a8922e5eb79283db0cb74644d292a992d88bb148
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:970c1e5f20a9c6313ae0d772b0f16498af7abe36336fc26af3744ed91839b9b6
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e961e61ae628fdef1c80ab9e62b59805293d403bafa5fdbd9c16e141e8f3833d
 size 23159546

 version https://git-lfs.github.com/spec/v1
+oid sha256:80aee59a609b1f37dbe1d027f3ef1467c75b3d54433eeeaed8474b61eeae1e2d
 size 23159546

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c316a89e564105b6a4d3697edf95b71fd92b58e6f0210008c23c402d785036ca
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:79c56ec642f4caf6c132646e9783305c87d1a4731e3978fcc2bd0ea8585ac231
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:406944ca466952137e0b114cdd90cd03d0c938cd95631d05a6add36515fca9c8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c717f38e6747d9b86924e9284e7ce198f0fb7dc628bd7877fbe61fddd3883692
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.201030927835052,
   "eval_steps": 97,
-  "global_step": 815,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5784,6 +5784,41 @@
       "learning_rate": 6.8417125980128675e-06,
       "loss": 0.6238,
       "step": 815
     }
   ],
   "logging_steps": 1,
@@ -5803,7 +5838,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.151872409070469e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.22680412371134,
   "eval_steps": 97,
+  "global_step": 820,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.8417125980128675e-06,
       "loss": 0.6238,
       "step": 815
+    },
+    {
+      "epoch": 4.206185567010309,
+      "grad_norm": 0.7783701419830322,
+      "learning_rate": 6.755754876962711e-06,
+      "loss": 0.5216,
+      "step": 816
+    },
+    {
+      "epoch": 4.211340206185567,
+      "grad_norm": 0.6552199125289917,
+      "learning_rate": 6.670301412974511e-06,
+      "loss": 0.4832,
+      "step": 817
+    },
+    {
+      "epoch": 4.216494845360825,
+      "grad_norm": 0.8719028830528259,
+      "learning_rate": 6.585353202493322e-06,
+      "loss": 0.575,
+      "step": 818
+    },
+    {
+      "epoch": 4.221649484536083,
+      "grad_norm": 0.7946240305900574,
+      "learning_rate": 6.500911236072532e-06,
+      "loss": 0.648,
+      "step": 819
+    },
+    {
+      "epoch": 4.22680412371134,
+      "grad_norm": 0.7797208428382874,
+      "learning_rate": 6.416976498362432e-06,
+      "loss": 0.4849,
+      "step": 820
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.171226860461752e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null