Training in progress, step 185, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54852292744f440802866f2a783ac23ba6ef16d422e18372b624c56a9993fd60
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:53f840c456e95ff795bb55a979f354b5ff45be3d04c09c6b95856e15c3fa87aa
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e585efd52a8e20e6ad89ed740f3d2f593db339c6c6144c06fc288ce92cceb4d
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:828ab7a81b68551a70d539cfd2cac13e0329b266849a1e49be1a7378953f427b
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0be8ec8c2101cfccd393ed730b21787429a8999e0b138398d5c14f37c51656e9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8fb359c3a6e91a0edc2b5df10f0aa833ed71c37ab5b62579c99595688bae7c64
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d26ba877a96beecd6515b5aee3300dc989b5588ecbd143a4bad8685b1c07c60
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:653ee3fc89860e50462e7548d430e695c40eaf8677cc1507132629ab8dddb6e3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.23293432546101586,
   "eval_steps": 386,
-  "global_step": 180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1275,6 +1275,41 @@
       "learning_rate": 9.971301406367644e-05,
       "loss": 0.8619,
       "step": 180
     }
   ],
   "logging_steps": 1,
@@ -1294,7 +1329,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.0122869090484224e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.23940472339048852,
   "eval_steps": 386,
+  "global_step": 185,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.971301406367644e-05,
       "loss": 0.8619,
       "step": 180
+    },
+    {
+      "epoch": 0.2342284050469104,
+      "grad_norm": 0.6980477571487427,
+      "learning_rate": 9.970858622429579e-05,
+      "loss": 0.8271,
+      "step": 181
+    },
+    {
+      "epoch": 0.23552248463280492,
+      "grad_norm": 0.954387903213501,
+      "learning_rate": 9.970412458767943e-05,
+      "loss": 0.8465,
+      "step": 182
+    },
+    {
+      "epoch": 0.23681656421869945,
+      "grad_norm": 0.8425692915916443,
+      "learning_rate": 9.969962915686083e-05,
+      "loss": 0.8893,
+      "step": 183
+    },
+    {
+      "epoch": 0.23811064380459399,
+      "grad_norm": 0.8565071225166321,
+      "learning_rate": 9.969509993489647e-05,
+      "loss": 0.939,
+      "step": 184
+    },
+    {
+      "epoch": 0.23940472339048852,
+      "grad_norm": 0.8831691145896912,
+      "learning_rate": 9.969053692486583e-05,
+      "loss": 0.8907,
+      "step": 185
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.0681837676331008e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null