Training in progress, step 815, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf452801c6be48cc1537a18941687b7b4700636962076dcf84dab95f9c490c5f
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:b351ea48a340e8d0aefe39d72e8c7c096d1970d8822472c4b4f9a248e83c5aa9
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e173d01920f0c4adb5acdafd91f3ef92c299c24fcad8b3c449520428b5d5f98
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f2e4b9fb46f9ee260fad8a5f6d28cc1e2028cf112c6ecd0f9b828a991c4b30b
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48357d05ceaf70ef750e9850734136084281b884901dd132b3d7307fc67b7534
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9fafc5ba32d8ff5b04e05a3ca43c3317ebaffa1127c0eaaaca972e5a128721f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ae1b957ea3b5be016b1408fb97083cf05faa199719b2f6f2412013b84e7b582
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2204a2f67a3dbafc8915907bcdcd66b239f7be2c081f646d7a24f64134e63ee4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0482044645745714,
   "eval_steps": 386,
-  "global_step": 810,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5701,6 +5701,41 @@
       "learning_rate": 9.049926140583487e-05,
       "loss": 0.7436,
       "step": 810
     }
   ],
   "logging_steps": 1,
@@ -5720,7 +5755,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.054592379985592e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.054674862504044,
   "eval_steps": 386,
+  "global_step": 815,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.049926140583487e-05,
       "loss": 0.7436,
       "step": 810
+    },
+    {
+      "epoch": 1.0494985441604658,
+      "grad_norm": 0.8351684808731079,
+      "learning_rate": 9.047506932194074e-05,
+      "loss": 0.8892,
+      "step": 811
+    },
+    {
+      "epoch": 1.0507926237463603,
+      "grad_norm": 0.8384698033332825,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 0.818,
+      "step": 812
+    },
+    {
+      "epoch": 1.052086703332255,
+      "grad_norm": 0.9196306467056274,
+      "learning_rate": 9.042660261272185e-05,
+      "loss": 0.734,
+      "step": 813
+    },
+    {
+      "epoch": 1.0533807829181494,
+      "grad_norm": 0.9839885234832764,
+      "learning_rate": 9.040232802034998e-05,
+      "loss": 0.7974,
+      "step": 814
+    },
+    {
+      "epoch": 1.054674862504044,
+      "grad_norm": 0.9852064251899719,
+      "learning_rate": 9.03780259581362e-05,
+      "loss": 0.8203,
+      "step": 815
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 9.110489238570271e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null