Training in progress, step 645, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8721ee43349bfc7e39016efefeb84f0798023653ef25fbfec79493feb5cad8e
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:d08b6d9b3a9d15d25aadb2cb24debe2d7cdca1e55bd48ea3702a43a94a2581af
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0b13d732b2a8dad952ee020e87f52fea1e5a59d34717eed4f8c6474e09d2124
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:b95e8afeb39ff0d445ff047308657bd1c41dd23bbe6bc9ee6297f75a79be822b
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3037bb99ad816203fc49047110ff2f6ec00b478885f7281330c502f80f4d07f6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4171439eadb13b2843ff5c47858b0150e7075e96e555aee2539438f15e25168
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc7648e34cd3e7e1b0e3d71e0e1fe805f3949228899d535acad963d0ca07c4a6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea258fcfaa803fc3574884ee61cef84bd44233621e9e6a5cc1b7b02d59d4cca3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8282109349725008,
   "eval_steps": 386,
-  "global_step": 640,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4503,6 +4503,41 @@
       "learning_rate": 9.419888751998767e-05,
       "loss": 0.7984,
       "step": 640
     }
   ],
   "logging_steps": 1,
@@ -4522,7 +4557,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.154797898838835e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8346813329019734,
   "eval_steps": 386,
+  "global_step": 645,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.419888751998767e-05,
       "loss": 0.7984,
       "step": 640
+    },
+    {
+      "epoch": 0.8295050145583953,
+      "grad_norm": 0.8296061754226685,
+      "learning_rate": 9.417959709536078e-05,
+      "loss": 0.8597,
+      "step": 641
+    },
+    {
+      "epoch": 0.8307990941442899,
+      "grad_norm": 0.8495222330093384,
+      "learning_rate": 9.416027663269881e-05,
+      "loss": 0.8396,
+      "step": 642
+    },
+    {
+      "epoch": 0.8320931737301844,
+      "grad_norm": 0.8501962423324585,
+      "learning_rate": 9.414092614513787e-05,
+      "loss": 0.9911,
+      "step": 643
+    },
+    {
+      "epoch": 0.833387253316079,
+      "grad_norm": 0.7546764612197876,
+      "learning_rate": 9.412154564583448e-05,
+      "loss": 0.9043,
+      "step": 644
+    },
+    {
+      "epoch": 0.8346813329019734,
+      "grad_norm": 0.8009942770004272,
+      "learning_rate": 9.410213514796564e-05,
+      "loss": 0.8242,
+      "step": 645
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.210694757423514e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null