Training in progress, step 850, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92942064b68f22a635d4178bdbcdffb8eb8e323b4bc90a33365128f3cc01616b
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:8fd2eff90e09ff5cb32ab1ebd775cb0f037fd91c63a7deff49c1bf83cf6d32e7
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ae4855dfd0bb55befec84f2621a632e36dc7b481da908c36b47f26a478c29ab
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac70266b2eba094d3128dd6f864a3ede94de052e2e0aa23dba2f8f69ad1aa502
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b44d761c2dc0b6e72b2c0f65252ea5160d07505b6189282d6a4254828b3a8986
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5259289c5a137780973c8367739829eb39d5d72183980f5655a6da8d42ad769
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ca47c546dafdf46a438555e2c974efc5fd475da85887cb0544686113929f218
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d74516f9adab70d9cfa3d092d8888b349f96cc406e15b22a9893c08784b3ae9b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.09349725008088,
   "eval_steps": 386,
-  "global_step": 845,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5946,6 +5946,41 @@
       "learning_rate": 8.963627323902104e-05,
       "loss": 0.6661,
       "step": 845
     }
   ],
   "logging_steps": 1,
@@ -5965,7 +6000,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.445870390078341e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0999676480103526,
   "eval_steps": 386,
+  "global_step": 850,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.963627323902104e-05,
       "loss": 0.6661,
       "step": 845
+    },
+    {
+      "epoch": 1.0947913296667746,
+      "grad_norm": 0.9900491833686829,
+      "learning_rate": 8.961112789936703e-05,
+      "loss": 0.874,
+      "step": 846
+    },
+    {
+      "epoch": 1.096085409252669,
+      "grad_norm": 1.002979040145874,
+      "learning_rate": 8.958595562781397e-05,
+      "loss": 0.6907,
+      "step": 847
+    },
+    {
+      "epoch": 1.0973794888385635,
+      "grad_norm": 0.9625217914581299,
+      "learning_rate": 8.956075644147664e-05,
+      "loss": 0.913,
+      "step": 848
+    },
+    {
+      "epoch": 1.0986735684244582,
+      "grad_norm": 1.0066606998443604,
+      "learning_rate": 8.953553035748817e-05,
+      "loss": 0.7295,
+      "step": 849
+    },
+    {
+      "epoch": 1.0999676480103526,
+      "grad_norm": 1.1030412912368774,
+      "learning_rate": 8.951027739299996e-05,
+      "loss": 0.792,
+      "step": 850
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 9.50176724866302e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null