Training in progress, step 605, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a11def1b8890ff338c8c007d664175329cb9dcda5b586da88c0224efb7c1162
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b390cff10ccb0462e2ce56478461d8e6338b504fc23e78bc158e0819d8dc9de
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdbd37404bc8a5fa123ffac8185bfbd34fb1ed364fd02edc69624ad20d060c6c
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:286cc2bd89d71803d1810c6df9f8f1ffd3acec5c70032db2dcefd9830c184d3f
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95ccc841f095274b3bf1305720cff27c6b752d6dc6d8c301879306023e700650
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a876f1e76952adad9028628bf8945815972da7c35421764155ece90dc00dd324
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5211010f14f9c1fb779ee2261873fc677d85f915b6ea93780c3196c674072288
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0544bca5420fca1dbd853da9c587baf2b7fb350e9011928f11101b6ff88e6e56
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7764477515367195,
   "eval_steps": 386,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4223,6 +4223,41 @@
       "learning_rate": 9.494572504247593e-05,
       "loss": 0.9044,
       "step": 600
     }
   ],
   "logging_steps": 1,
@@ -4242,7 +4277,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.707623030161408e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7829181494661922,
   "eval_steps": 386,
+  "global_step": 605,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.494572504247593e-05,
       "loss": 0.9044,
       "step": 600
+    },
+    {
+      "epoch": 0.7777418311226141,
+      "grad_norm": 0.8177486062049866,
+      "learning_rate": 9.492764667543252e-05,
+      "loss": 0.8132,
+      "step": 601
+    },
+    {
+      "epoch": 0.7790359107085085,
+      "grad_norm": 1.0061938762664795,
+      "learning_rate": 9.490953776174955e-05,
+      "loss": 0.9204,
+      "step": 602
+    },
+    {
+      "epoch": 0.7803299902944031,
+      "grad_norm": 0.8224837779998779,
+      "learning_rate": 9.489139831373944e-05,
+      "loss": 0.7909,
+      "step": 603
+    },
+    {
+      "epoch": 0.7816240698802976,
+      "grad_norm": 0.7812953591346741,
+      "learning_rate": 9.48732283437353e-05,
+      "loss": 0.9583,
+      "step": 604
+    },
+    {
+      "epoch": 0.7829181494661922,
+      "grad_norm": 0.8498075604438782,
+      "learning_rate": 9.485502786409107e-05,
+      "loss": 1.0692,
+      "step": 605
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.763519888746086e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null