Training in progress, step 680, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6453d6251e0ebd3c93ae6ab461c741d23bdd3c9054437a07f234571e6bbd870d
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:2aa4cdb306bf7294222f7b3f970f532c7b4cdc22e6cc372d92a68a5832ae3ac9
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e59ea0d170b1d97152af9d4304a884d7a12b54d8018fa0f222189269b8d142e
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:3eb132123db17680d8840722840709fbc913fe87e08366059ecb3b185ef779ee
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e27193596efc21909042ececa9090022df017f72ac0906ce611ddf2a46346d8f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:477ac7793a58a9aee87e63f0ed0db383646c754c91fbf8cf6726681a3e1bf55c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f52f94e7e552f4e5062ad720e9a6e71644dc7d8752db34370b491b7d624535d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:59c557a3a19fcbe03c6517f32635519665f582ddfe4481e7465a155c1f572f8b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8735037204788094,
   "eval_steps": 386,
-  "global_step": 675,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4748,6 +4748,41 @@
       "learning_rate": 9.350593706906651e-05,
       "loss": 0.787,
       "step": 675
     }
   ],
   "logging_steps": 1,
@@ -4767,7 +4802,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.546075908931584e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8799741184082821,
   "eval_steps": 386,
+  "global_step": 680,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.350593706906651e-05,
       "loss": 0.787,
       "step": 675
+    },
+    {
+      "epoch": 0.874797800064704,
+      "grad_norm": 0.7540378570556641,
+      "learning_rate": 9.348560326234381e-05,
+      "loss": 0.8578,
+      "step": 676
+    },
+    {
+      "epoch": 0.8760918796505985,
+      "grad_norm": 0.7626041173934937,
+      "learning_rate": 9.346523988943758e-05,
+      "loss": 0.9294,
+      "step": 677
+    },
+    {
+      "epoch": 0.8773859592364931,
+      "grad_norm": 0.8360442519187927,
+      "learning_rate": 9.3444846964193e-05,
+      "loss": 0.8635,
+      "step": 678
+    },
+    {
+      "epoch": 0.8786800388223875,
+      "grad_norm": 0.9039386510848999,
+      "learning_rate": 9.342442450047537e-05,
+      "loss": 0.83,
+      "step": 679
+    },
+    {
+      "epoch": 0.8799741184082821,
+      "grad_norm": 0.7554466724395752,
+      "learning_rate": 9.340397251217009e-05,
+      "loss": 0.8103,
+      "step": 680
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.601972767516262e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null