Training in progress, step 40, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a0904a44c1929f97e8bd4e5c46a96a8a3044de6f935bcf5630acdb0cdb6d739
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:753c7f048d3c17a8dff05f7e48fa6cf023fc614649bb13ef4db1a60157f75a09
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ef6f282d85d8e63c53b00534e682ea5282bac1edb49ab13b1b78e144354038b
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:9012b11cde3a2f0c54dbc6f706596bd3a5f30e4c957cf2683a03dee748e9f767
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e24fd0a4708a45e9b32be5aab9d4589ac1e498dcf2ba55e9e776e2a6e66e9b62
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6c5ab014d4aef79b98314290c49544dc6945723046a24993974bca5a70c4fbb
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a97edf5ef9280d040e46685cc4e47c24383c42a51949ff834379ab1766a8b0a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b1e5be422da84e599b3b273b39d45d7ca20a6f0b0460857ee0c0fe6a229a2c9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.045292785506308636,
   "eval_steps": 386,
-  "global_step": 35,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -260,6 +260,41 @@
       "learning_rate": 7e-05,
       "loss": 0.9668,
       "step": 35
     }
   ],
   "logging_steps": 1,
@@ -279,7 +314,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.912780100927488e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0517631834357813,
   "eval_steps": 386,
+  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7e-05,
       "loss": 0.9668,
       "step": 35
+    },
+    {
+      "epoch": 0.046586865092203174,
+      "grad_norm": 1.5457032918930054,
+      "learning_rate": 7.2e-05,
+      "loss": 1.1385,
+      "step": 36
+    },
+    {
+      "epoch": 0.047880944678097705,
+      "grad_norm": 1.5587060451507568,
+      "learning_rate": 7.4e-05,
+      "loss": 1.1707,
+      "step": 37
+    },
+    {
+      "epoch": 0.049175024263992236,
+      "grad_norm": 1.079053282737732,
+      "learning_rate": 7.6e-05,
+      "loss": 1.0655,
+      "step": 38
+    },
+    {
+      "epoch": 0.050469103849886766,
+      "grad_norm": 1.1773897409439087,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 1.0465,
+      "step": 39
+    },
+    {
+      "epoch": 0.0517631834357813,
+      "grad_norm": 1.2437673807144165,
+      "learning_rate": 8e-05,
+      "loss": 1.2779,
+      "step": 40
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.471748686774272e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null