Training in progress, step 425, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bda3973802d4dbe7e1febacfe2285c1ad74a69704c82dcd78446562f9a1926d
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ebdb29d9486a130aa0643f46ee0a0c62804d066e5e565482737346091c91a4f
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:946ca5a242b732a08489e3c9f5009fccf9316aa3c64052f3c123a7cbf199b27c
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2e999ae9fca5af619ba5b1c0aa8c6c5128c6255b22b10d7fa7e41d1f7ed2f89
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:594ae8f292448e23ba6c3e26a64c1c1ada41d262255da9a2d6d6797b9bd3a554
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1f4bee02a2423f6bf461638ccfb451ce039046573115c43bd8f40b32464c3ba
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccc44ae533ab5cb598c5cab350c6c0e00c24c2118f4dfe0f5a4518a727f56b8d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:af5756a3003f956101e6e090da18ea2a94d3b04f61ed1d4e64424da2405b4032
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5435134260757036,
   "eval_steps": 386,
-  "global_step": 420,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2963,6 +2963,41 @@
       "learning_rate": 9.769101037869187e-05,
       "loss": 0.9612,
       "step": 420
     }
   ],
   "logging_steps": 1,
@@ -2982,7 +3017,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.6953361211129856e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5499838240051763,
   "eval_steps": 386,
+  "global_step": 425,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.769101037869187e-05,
       "loss": 0.9612,
       "step": 420
+    },
+    {
+      "epoch": 0.5448075056615982,
+      "grad_norm": 0.7557950615882874,
+      "learning_rate": 9.767861010334962e-05,
+      "loss": 0.9965,
+      "step": 421
+    },
+    {
+      "epoch": 0.5461015852474927,
+      "grad_norm": 0.812099814414978,
+      "learning_rate": 9.766617741096746e-05,
+      "loss": 0.8824,
+      "step": 422
+    },
+    {
+      "epoch": 0.5473956648333873,
+      "grad_norm": 0.8438544273376465,
+      "learning_rate": 9.765371230999843e-05,
+      "loss": 0.8852,
+      "step": 423
+    },
+    {
+      "epoch": 0.5486897444192818,
+      "grad_norm": 0.7006101608276367,
+      "learning_rate": 9.764121480891765e-05,
+      "loss": 0.808,
+      "step": 424
+    },
+    {
+      "epoch": 0.5499838240051763,
+      "grad_norm": 0.7312132716178894,
+      "learning_rate": 9.76286849162223e-05,
+      "loss": 0.7964,
+      "step": 425
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.751232979697664e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null