Training in progress, step 325, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6953ae7f6115d570fb8b6044270a4fe2e65715f11390da71b6c6571b692b38fc
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ea56450fdd0f062e2582b0d9342bc249b9178a0f643c7c9864b7972919084f4
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2125e4e945b6170832b2f6b047046dd51779e474a701b162d37c31462bade964
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4fa14455bc2fa646a0d1356cf7b7978478e663d2abc89b631beb057c254467b
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d60ac4f3dfeb480aa47e46e780d6d5afc472bfe9f3793df96d8ba938b9c9522
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7af9f543e1b227e80431d9b71735f56ac10caba331db4119655a613e1f923579
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:259f2542494ed0450cbd0aba881cda09f149af3f98edcffaf21912036f30e17f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ebcc6744bfae863fe4396052394cc81a120b788a3be0b48befca2ef92651996
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4141054674862504,
   "eval_steps": 386,
-  "global_step": 320,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2255,6 +2255,41 @@
       "learning_rate": 9.876597839525814e-05,
       "loss": 1.1169,
       "step": 320
     }
   ],
   "logging_steps": 1,
@@ -2274,7 +2309,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.5773989494194176e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.42057586541572306,
   "eval_steps": 386,
+  "global_step": 325,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.876597839525814e-05,
       "loss": 1.1169,
       "step": 320
+    },
+    {
+      "epoch": 0.41539954707214494,
+      "grad_norm": 0.7923420667648315,
+      "learning_rate": 9.875685870448672e-05,
+      "loss": 0.9942,
+      "step": 321
+    },
+    {
+      "epoch": 0.4166936266580395,
+      "grad_norm": 0.7265552282333374,
+      "learning_rate": 9.874770586356616e-05,
+      "loss": 1.0377,
+      "step": 322
+    },
+    {
+      "epoch": 0.417987706243934,
+      "grad_norm": 0.7586270570755005,
+      "learning_rate": 9.873851987871954e-05,
+      "loss": 0.9172,
+      "step": 323
+    },
+    {
+      "epoch": 0.41928178582982856,
+      "grad_norm": 0.782192587852478,
+      "learning_rate": 9.872930075619249e-05,
+      "loss": 0.9219,
+      "step": 324
+    },
+    {
+      "epoch": 0.42057586541572306,
+      "grad_norm": 0.8508116602897644,
+      "learning_rate": 9.872004850225313e-05,
+      "loss": 0.939,
+      "step": 325
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.633295808004096e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null