Training in progress, step 30, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e681c7412ef2557f51f437204f7dbbccb418dccefea52569f201f7a05c61feb
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7c25e36ef1bea88e955b04a392995659ce2efa2958e9a824de856926f2f78b2
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:196d52d70a6910ff8c67869e7cbbe76c44d57cc489019ff6d2baf29ba21ca21c
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc69a4cb3de67ebaa3a95161b888f3e6a62143841950ec7d93681c428ce896bf
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bec6c83fbb7d43296cc5ab0e300576282a47308ba5787731efc1f099f27e291
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e8303210a87e6366e53f9d2ad1dc5984114aa017ddbff7d118553d8efe51202
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:814b79b47e12bde76b22a1ac4fd2f1e7ddf84c332d0b19c3538a1fad1c6cc96e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a73e1ff9beffc13aa54f4adf4df9ed4ad8819cc503c53ddfd100ef74e91d520
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.03235198964736331,
   "eval_steps": 386,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -190,6 +190,41 @@
       "learning_rate": 5e-05,
       "loss": 1.4606,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -209,7 +244,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.79484292923392e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.038822387576835975,
   "eval_steps": 386,
+  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5e-05,
       "loss": 1.4606,
       "step": 25
+    },
+    {
+      "epoch": 0.033646069233257844,
+      "grad_norm": 1.5480064153671265,
+      "learning_rate": 5.2000000000000004e-05,
+      "loss": 1.5027,
+      "step": 26
+    },
+    {
+      "epoch": 0.034940148819152375,
+      "grad_norm": 1.6736445426940918,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 1.2426,
+      "step": 27
+    },
+    {
+      "epoch": 0.03623422840504691,
+      "grad_norm": 1.7392551898956299,
+      "learning_rate": 5.6000000000000006e-05,
+      "loss": 1.4703,
+      "step": 28
+    },
+    {
+      "epoch": 0.037528307990941444,
+      "grad_norm": 1.6173359155654907,
+      "learning_rate": 5.8e-05,
+      "loss": 1.4546,
+      "step": 29
+    },
+    {
+      "epoch": 0.038822387576835975,
+      "grad_norm": 1.3955802917480469,
+      "learning_rate": 6e-05,
+      "loss": 1.3808,
+      "step": 30
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.353811515080704e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null