Training in progress, step 825, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3c71fa0cb7e157841e2952c317cdfa017d9a0e7e32737874f4d20b71e23615d
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:87c61933c7acb261c5801449d983e244cb4ae8e4a22df14ff59f4f35405beffe
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d95a62ba3aba13f7257482e49d433101f269255a12dfe0be4d7ed42a74d3250
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:455f6d8dbeea414678dbb91fa9f32b887be77fc737e8ea0e996b1a93a88b7851
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9527a9656bdadb0a16bb79de1cfddac3292bebf2eb8fa1acb5151a1acad7acff
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:89cfdc62760b08909a8de6668672f514712e7b15d9dc0aff582b1601f05bbba2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fdc9d39e07b36aad394340652fa0d56c4ee44db94279b0fd69ca40c5c2e0621f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e53509185c72a1d7e392d56b37c619974763a56a864cc85234562e53961497e3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0611452604335168,
   "eval_steps": 386,
-  "global_step": 820,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5771,6 +5771,41 @@
       "learning_rate": 9.025610417840237e-05,
       "loss": 0.781,
       "step": 820
     }
   ],
   "logging_steps": 1,
@@ -5790,7 +5825,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.166386097154949e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0676156583629894,
   "eval_steps": 386,
+  "global_step": 825,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.025610417840237e-05,
       "loss": 0.781,
       "step": 820
+    },
+    {
+      "epoch": 1.0624393400194112,
+      "grad_norm": 0.8441341519355774,
+      "learning_rate": 9.023163764477636e-05,
+      "loss": 0.7636,
+      "step": 821
+    },
+    {
+      "epoch": 1.0637334196053057,
+      "grad_norm": 0.858167290687561,
+      "learning_rate": 9.02071437573621e-05,
+      "loss": 0.6853,
+      "step": 822
+    },
+    {
+      "epoch": 1.0650274991912003,
+      "grad_norm": 0.9035941958427429,
+      "learning_rate": 9.01826225328132e-05,
+      "loss": 0.7757,
+      "step": 823
+    },
+    {
+      "epoch": 1.0663215787770948,
+      "grad_norm": 0.8677383065223694,
+      "learning_rate": 9.015807398780177e-05,
+      "loss": 0.917,
+      "step": 824
+    },
+    {
+      "epoch": 1.0676156583629894,
+      "grad_norm": 0.8263882994651794,
+      "learning_rate": 9.013349813901859e-05,
+      "loss": 0.6574,
+      "step": 825
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 9.222282955739628e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null