Training in progress, step 420, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3c853d3c99ad98cd0f1a3885e822275c0b7c1209b0bb534194841e45f77ccde
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:2bda3973802d4dbe7e1febacfe2285c1ad74a69704c82dcd78446562f9a1926d
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:674fe507fc4380e8cfbc66f2ade552e239f7263730b76fe4741c197838f048dd
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:946ca5a242b732a08489e3c9f5009fccf9316aa3c64052f3c123a7cbf199b27c
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:693ea3a43eaa5cd09102e82652754c22734bed378c16600849bbc7fa038d670b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:594ae8f292448e23ba6c3e26a64c1c1ada41d262255da9a2d6d6797b9bd3a554
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b210180cd4578e203d13e7831d41ab50e4f3ba110e1252d9dac7fb7d3a0c022
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccc44ae533ab5cb598c5cab350c6c0e00c24c2118f4dfe0f5a4518a727f56b8d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.537043028146231,
   "eval_steps": 386,
-  "global_step": 415,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2928,6 +2928,41 @@
       "learning_rate": 9.77525252054893e-05,
       "loss": 0.6972,
       "step": 415
     }
   ],
   "logging_steps": 1,
@@ -2947,7 +2982,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.639439262528307e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5435134260757036,
   "eval_steps": 386,
+  "global_step": 420,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.77525252054893e-05,
       "loss": 0.6972,
       "step": 415
+    },
+    {
+      "epoch": 0.5383371077321255,
+      "grad_norm": 1.0071479082107544,
+      "learning_rate": 9.774028714137133e-05,
+      "loss": 0.9233,
+      "step": 416
+    },
+    {
+      "epoch": 0.5396311873180201,
+      "grad_norm": 0.916771411895752,
+      "learning_rate": 9.772801661827874e-05,
+      "loss": 1.0072,
+      "step": 417
+    },
+    {
+      "epoch": 0.5409252669039146,
+      "grad_norm": 0.8663278818130493,
+      "learning_rate": 9.771571364455439e-05,
+      "loss": 1.1011,
+      "step": 418
+    },
+    {
+      "epoch": 0.5422193464898091,
+      "grad_norm": 0.7842355370521545,
+      "learning_rate": 9.77033782285631e-05,
+      "loss": 1.0382,
+      "step": 419
+    },
+    {
+      "epoch": 0.5435134260757036,
+      "grad_norm": 0.8407487273216248,
+      "learning_rate": 9.769101037869187e-05,
+      "loss": 0.9612,
+      "step": 420
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.6953361211129856e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null