Training in progress, step 710, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:276cc98327297733df76553f5e3b6d4860107114f0c0d09e76850d88f2e3e792
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:a820a8f4eb542479915dcb468727c29cefc3d7359dd207ee3312c7df3c587cae
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67e1bd4b745e410efa1702b658de1ecd4ec95ec3e117bd275ec883dc33e4fe8f
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc0f6934bb01d904150982cb8e47c2f710c6f81fcd6bdc0736a215fffd1bf2df
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2d3d37bb63f7049bbb3b5a0e021e4d5be2c3c48a17668f2b1c66f33aefe4ecd
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b64b4ae62e4075beb8480b1639d23fdd45af44a932132fe66540a077fcc1c087
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f8030d97101266fb0ab5c1ea76d734a66ed4913265b7bd97b3ca554de2e26fd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:733800b83bb7a95c51151abac1a9de03ea9b82fb261b8821470be38e62d0baca
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9123261080556454,
   "eval_steps": 386,
-  "global_step": 705,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4958,6 +4958,41 @@
       "learning_rate": 9.288311833837917e-05,
       "loss": 0.8883,
       "step": 705
     }
   ],
   "logging_steps": 1,
@@ -4977,7 +5012,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.881457060439654e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9187965059851181,
   "eval_steps": 386,
+  "global_step": 710,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.288311833837917e-05,
       "loss": 0.8883,
       "step": 705
+    },
+    {
+      "epoch": 0.91362018764154,
+      "grad_norm": 0.6708902716636658,
+      "learning_rate": 9.286190365013217e-05,
+      "loss": 0.8315,
+      "step": 706
+    },
+    {
+      "epoch": 0.9149142672274345,
+      "grad_norm": 0.802245557308197,
+      "learning_rate": 9.284065981975958e-05,
+      "loss": 0.9128,
+      "step": 707
+    },
+    {
+      "epoch": 0.916208346813329,
+      "grad_norm": 0.8574107885360718,
+      "learning_rate": 9.281938686170526e-05,
+      "loss": 0.9122,
+      "step": 708
+    },
+    {
+      "epoch": 0.9175024263992235,
+      "grad_norm": 1.060194730758667,
+      "learning_rate": 9.279808479043286e-05,
+      "loss": 1.1447,
+      "step": 709
+    },
+    {
+      "epoch": 0.9187965059851181,
+      "grad_norm": 0.8753707408905029,
+      "learning_rate": 9.277675362042581e-05,
+      "loss": 0.8524,
+      "step": 710
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.937353919024333e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null