Training in progress, step 640, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d9ca924d8d90e795e86de8369f2c6d48dc26135b122cf5d4b6c82671b86a4ed
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8721ee43349bfc7e39016efefeb84f0798023653ef25fbfec79493feb5cad8e
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:129495fdf41d513360a3ce9d47abca649f2bf128467121d3f900e1098d9d5e75
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0b13d732b2a8dad952ee020e87f52fea1e5a59d34717eed4f8c6474e09d2124
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:098658a8b5f74b202fbb5e29652962f5112c3b574b4ce341756735375ee72208
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3037bb99ad816203fc49047110ff2f6ec00b478885f7281330c502f80f4d07f6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17863c6ebccc1ff43b06a14aeb4d77d5b06180d5676f46d6243e5056d6bc48af
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc7648e34cd3e7e1b0e3d71e0e1fe805f3949228899d535acad963d0ca07c4a6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8217405370430282,
   "eval_steps": 386,
-  "global_step": 635,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4468,6 +4468,41 @@
       "learning_rate": 9.429488861426137e-05,
       "loss": 0.8799,
       "step": 635
     }
   ],
   "logging_steps": 1,
@@ -4487,7 +4522,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.098901040254157e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8282109349725008,
   "eval_steps": 386,
+  "global_step": 640,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.429488861426137e-05,
       "loss": 0.8799,
       "step": 635
+    },
+    {
+      "epoch": 0.8230346166289226,
+      "grad_norm": 0.7446788549423218,
+      "learning_rate": 9.427574857613672e-05,
+      "loss": 0.8071,
+      "step": 636
+    },
+    {
+      "epoch": 0.8243286962148172,
+      "grad_norm": 0.8236122131347656,
+      "learning_rate": 9.425657843460288e-05,
+      "loss": 0.9889,
+      "step": 637
+    },
+    {
+      "epoch": 0.8256227758007118,
+      "grad_norm": 0.8189204335212708,
+      "learning_rate": 9.423737820269376e-05,
+      "loss": 0.9607,
+      "step": 638
+    },
+    {
+      "epoch": 0.8269168553866063,
+      "grad_norm": 0.9449727535247803,
+      "learning_rate": 9.421814789346375e-05,
+      "loss": 0.9581,
+      "step": 639
+    },
+    {
+      "epoch": 0.8282109349725008,
+      "grad_norm": 0.7527281045913696,
+      "learning_rate": 9.419888751998767e-05,
+      "loss": 0.7984,
+      "step": 640
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.154797898838835e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null