Training in progress, step 385, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:158ccb26f9051a1ce9a47cd393a551e5dce6c352732882a27c1b88d8d4e2f682
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ad5bb6a07b5358dccb40a92e32494631d5f1c697661e9c9628e3e1b798cad32
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa40f3c08f8aa8762162a52339fec13192340a007a872adbd2a0699f5af2f111
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4f1b6dde27d8af2c96a8d1851af11d6c5644f5a3c144dfa41d9597eeffc1f83
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37239226899f264c9f793826b646f1103d28fdf8250bd2a9877ca22fe4056168
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcc07ce19b7fd4f90004a8e11f50490415cba07804798230ae98a4519365742a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39411f8be444761c5218c2d62e475489e6eb0154d2c02b4d166b577f908343d6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:51fbcb00d7a67643a5230f4ba48fcd85e7a8215e6d43c6acb6e42d34667255bc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.49175024263992234,
   "eval_steps": 386,
-  "global_step": 380,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2675,6 +2675,41 @@
       "learning_rate": 9.816034510373286e-05,
       "loss": 0.9889,
       "step": 380
     }
   ],
   "logging_steps": 1,
@@ -2694,7 +2729,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.2481612524355584e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.498220640569395,
   "eval_steps": 386,
+  "global_step": 385,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.816034510373286e-05,
       "loss": 0.9889,
       "step": 380
+    },
+    {
+      "epoch": 0.4930443222258169,
+      "grad_norm": 0.829309344291687,
+      "learning_rate": 9.814924818785865e-05,
+      "loss": 0.8139,
+      "step": 381
+    },
+    {
+      "epoch": 0.4943384018117114,
+      "grad_norm": 0.83943110704422,
+      "learning_rate": 9.81381185349542e-05,
+      "loss": 0.9202,
+      "step": 382
+    },
+    {
+      "epoch": 0.49563248139760596,
+      "grad_norm": 0.7981933355331421,
+      "learning_rate": 9.812695615258662e-05,
+      "loss": 0.9131,
+      "step": 383
+    },
+    {
+      "epoch": 0.49692656098350046,
+      "grad_norm": 0.7930905818939209,
+      "learning_rate": 9.81157610483453e-05,
+      "loss": 0.769,
+      "step": 384
+    },
+    {
+      "epoch": 0.498220640569395,
+      "grad_norm": 0.8699679970741272,
+      "learning_rate": 9.81045332298419e-05,
+      "loss": 0.9468,
+      "step": 385
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.304058111020237e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null