Training in progress, step 830, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87c61933c7acb261c5801449d983e244cb4ae8e4a22df14ff59f4f35405beffe
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c97f065b63924129e4c663d454d849b99fe21c9944aa43d8df3d63a14aac99d
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:455f6d8dbeea414678dbb91fa9f32b887be77fc737e8ea0e996b1a93a88b7851
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:08f9ba6c10be2f0554bf200b13df21893e6b5e45fcbf74b40417cfc144aea9fe
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89cfdc62760b08909a8de6668672f514712e7b15d9dc0aff582b1601f05bbba2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e4587789d75916daa4e015c3e1d3b012bb77220cf17f2065c98733029f501f8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e53509185c72a1d7e392d56b37c619974763a56a864cc85234562e53961497e3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:904b9d02e88bf351afd7f5ee6e6ab8b6ff7fc6380ccab1f4d6df6695877c352d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0676156583629894,
   "eval_steps": 386,
-  "global_step": 825,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5806,6 +5806,41 @@
       "learning_rate": 9.013349813901859e-05,
       "loss": 0.6574,
       "step": 825
     }
   ],
   "logging_steps": 1,
@@ -5825,7 +5860,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.222282955739628e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.074086056292462,
   "eval_steps": 386,
+  "global_step": 830,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.013349813901859e-05,
       "loss": 0.6574,
       "step": 825
+    },
+    {
+      "epoch": 1.0689097379488839,
+      "grad_norm": 0.9867215752601624,
+      "learning_rate": 9.010889500317294e-05,
+      "loss": 0.8753,
+      "step": 826
+    },
+    {
+      "epoch": 1.0702038175347783,
+      "grad_norm": 0.9969760775566101,
+      "learning_rate": 9.008426459699269e-05,
+      "loss": 0.878,
+      "step": 827
+    },
+    {
+      "epoch": 1.071497897120673,
+      "grad_norm": 0.971978485584259,
+      "learning_rate": 9.005960693722422e-05,
+      "loss": 0.8938,
+      "step": 828
+    },
+    {
+      "epoch": 1.0727919767065675,
+      "grad_norm": 0.9560033679008484,
+      "learning_rate": 9.003492204063247e-05,
+      "loss": 0.7956,
+      "step": 829
+    },
+    {
+      "epoch": 1.074086056292462,
+      "grad_norm": 0.8045957088470459,
+      "learning_rate": 9.001020992400087e-05,
+      "loss": 0.7999,
+      "step": 830
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 9.278179814324306e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null