Training in progress, step 335, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b274b23b17bca2b14ae2d9fb45fd524420d050cf491dd85b4f172f6c7a0696bc
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5a88ed30b979e8b79a0b8e0587def94ba339e1948a5407035566744f99c4d5d
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df10525457092fcfa29efef6dcd446f702708d75f5e1878045e0a47131e0987f
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fb839e71f2559609681b6b808413e24fd0cd168e4b4c062feff2eeb23eeb240
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07bb2c506278a8c812ee9f9f106c4eb66fe3fe1b7cd81f4089b25cec2eac7823
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:07fa940b7237013ec0cf4dbb080091b3cbff44f1b6dbb14ecfef39a6acf258de
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e09b704f8200c0a4b54bab97b9a94f210e99c85e6d931d311c15309f3a845f1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dee82665dfa8e81a745327347013c931be0b2da410ed681c21a036a30f1549b1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.42704626334519574,
   "eval_steps": 386,
-  "global_step": 330,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2325,6 +2325,41 @@
       "learning_rate": 9.867329048233387e-05,
       "loss": 0.7913,
       "step": 330
     }
   ],
   "logging_steps": 1,
@@ -2344,7 +2379,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.6891926665887744e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.43351666127466837,
   "eval_steps": 386,
+  "global_step": 335,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.867329048233387e-05,
       "loss": 0.7913,
       "step": 330
+    },
+    {
+      "epoch": 0.42834034293109025,
+      "grad_norm": 0.8913648128509521,
+      "learning_rate": 9.866383957281309e-05,
+      "loss": 0.7885,
+      "step": 331
+    },
+    {
+      "epoch": 0.4296344225169848,
+      "grad_norm": 0.9160618782043457,
+      "learning_rate": 9.865435557638757e-05,
+      "loss": 0.7296,
+      "step": 332
+    },
+    {
+      "epoch": 0.4309285021028793,
+      "grad_norm": 0.7498170137405396,
+      "learning_rate": 9.864483849950553e-05,
+      "loss": 0.9655,
+      "step": 333
+    },
+    {
+      "epoch": 0.43222258168877387,
+      "grad_norm": 0.7315449714660645,
+      "learning_rate": 9.863528834863773e-05,
+      "loss": 0.6886,
+      "step": 334
+    },
+    {
+      "epoch": 0.43351666127466837,
+      "grad_norm": 0.8278869390487671,
+      "learning_rate": 9.862570513027735e-05,
+      "loss": 0.9637,
+      "step": 335
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.745089525173453e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null