Training in progress, step 585, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd8d5d17a70fc79518195b5b2e2d18e0edf35451d2de3c1d4af60412a1a30e8
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:86e81dd5a890a7956466a556372acdea40e7874dd36d48313477dc752b4cb1d9
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1105fd8323adc5ab31859ebf0c1f559487e30aa157c674ed5d23d9852296ed98
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:613659f29181eb8b432f0d2e9dfcae177dee2e33e9738d77d19452cd77b18bf8
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48e8d93d81cb229c0956f2142d68e1f8aa0357ca11d56ce040bb137cf176e02f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d53b54b216d5077c52ca37e8c9f0b1685f0a5a9e8331d755086a0f111dd328a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49970088ac3a5ad000c30d8106322041a0265ebced6d734c6afdb98d800f2095
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7c9085847647e6cd13695f1ec4ce9d144b2ab365d6811882265bef6d20ea83e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7505661598188289,
   "eval_steps": 386,
-  "global_step": 580,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4083,6 +4083,41 @@
       "learning_rate": 9.530085881215705e-05,
       "loss": 0.8092,
       "step": 580
     }
   ],
   "logging_steps": 1,
@@ -4102,7 +4137,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.484035595822694e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7570365577483015,
   "eval_steps": 386,
+  "global_step": 585,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.530085881215705e-05,
       "loss": 0.8092,
       "step": 580
+    },
+    {
+      "epoch": 0.7518602394047234,
+      "grad_norm": 0.8892885446548462,
+      "learning_rate": 9.528339393146033e-05,
+      "loss": 0.8422,
+      "step": 581
+    },
+    {
+      "epoch": 0.7531543189906179,
+      "grad_norm": 0.7766391634941101,
+      "learning_rate": 9.526589826224887e-05,
+      "loss": 0.9596,
+      "step": 582
+    },
+    {
+      "epoch": 0.7544483985765125,
+      "grad_norm": 0.8290911316871643,
+      "learning_rate": 9.524837181641813e-05,
+      "loss": 0.9624,
+      "step": 583
+    },
+    {
+      "epoch": 0.755742478162407,
+      "grad_norm": 0.7832044363021851,
+      "learning_rate": 9.523081460588444e-05,
+      "loss": 0.8141,
+      "step": 584
+    },
+    {
+      "epoch": 0.7570365577483015,
+      "grad_norm": 0.9279314875602722,
+      "learning_rate": 9.521322664258508e-05,
+      "loss": 0.8869,
+      "step": 585
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.539932454407373e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null