Training in progress, step 175, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4942b6183f603313ddbbead0c35bcd0516a7d27c74d2166204d4f4ba26f31290
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c4368dad332de79ce5e1776ab789ac709273e1d5cfde0537da85b4f8710a393
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8e1d461799f7506f1a0debcac6a984a4a3c2a3599cdd0d68dc31e8b53af334e
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8facfa6be8cc3dc3ebce039ce013fd72af97d5a0f36cf2d27f0da5791f1e5fd
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db5d1d56ddf5f29362595a5f01521cef39fc7e9347ff871a7770a4e89003ccaa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b64f69b613fca6d8f40873768b1557e9b59b999a2059a3ece3e3ec028766d3f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:255044db1acc114cb0f2f88c44fef16238de1948a1b84ffe68ecd9e99fb61dda
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:61a3e47e838c8bbbc792ba3c58434b0fe8b77cb810386b07d18a0158d41dc378
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.21999352960207053,
   "eval_steps": 386,
-  "global_step": 170,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1205,6 +1205,41 @@
       "learning_rate": 9.975543295858035e-05,
       "loss": 0.8836,
       "step": 170
     }
   ],
   "logging_steps": 1,
@@ -1224,7 +1259,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.9004931918790656e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.22646392753154318,
   "eval_steps": 386,
+  "global_step": 175,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.975543295858035e-05,
       "loss": 0.8836,
       "step": 170
+    },
+    {
+      "epoch": 0.22128760918796506,
+      "grad_norm": 0.8391397595405579,
+      "learning_rate": 9.97513432533431e-05,
+      "loss": 0.9003,
+      "step": 171
+    },
+    {
+      "epoch": 0.2225816887738596,
+      "grad_norm": 0.9666821360588074,
+      "learning_rate": 9.974721972179931e-05,
+      "loss": 0.9528,
+      "step": 172
+    },
+    {
+      "epoch": 0.22387576835975412,
+      "grad_norm": 0.9321691393852234,
+      "learning_rate": 9.974306236675259e-05,
+      "loss": 0.9575,
+      "step": 173
+    },
+    {
+      "epoch": 0.22516984794564865,
+      "grad_norm": 0.8022271990776062,
+      "learning_rate": 9.973887119102957e-05,
+      "loss": 0.8731,
+      "step": 174
+    },
+    {
+      "epoch": 0.22646392753154318,
+      "grad_norm": 1.1056872606277466,
+      "learning_rate": 9.973464619747983e-05,
+      "loss": 0.9925,
+      "step": 175
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.956390050463744e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null