Training in progress, step 460, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b043cd7a58160e6996c90fab1beb1517bb2613e4b0999b16dca09195ed9daad8
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a8e766c93c4cb51d235429ab576d2269c0fe74ccc12cbec07722e592fc31d83
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa5d29c4411fdeb35a1244b024f4c5c0c57180230285a448b1e43fc79ce803fc
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9f249a9f18d595322f53870ff656ff9f0fb190feac83ed8e8e07c86b79d402d
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:450d4f0e3a0bdbd8c489459f618a362a3c0456d0f149f06ab048c544e79c9d17
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4d5ea361bd8f109ccf9deae94ad5b06097316390b345e2c5b8dfcae47e6460a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ef7e9caf74d940d149631b7d434e8f3ed3d9f20dbfe42784c1a25924da5d43b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:702e6ac2a5684998fe08dbec2461764b61e07652058ef3b71a2777c5464d9e27
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5888062115820123,
   "eval_steps": 386,
-  "global_step": 455,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3208,6 +3208,41 @@
       "learning_rate": 9.72377690802055e-05,
       "loss": 0.9959,
       "step": 455
     }
   ],
   "logging_steps": 1,
@@ -3227,7 +3262,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.0866141312057344e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.595276609511485,
   "eval_steps": 386,
+  "global_step": 460,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.72377690802055e-05,
       "loss": 0.9959,
       "step": 455
+    },
+    {
+      "epoch": 0.5901002911679069,
+      "grad_norm": 0.8143338561058044,
+      "learning_rate": 9.722423938195922e-05,
+      "loss": 0.9954,
+      "step": 456
+    },
+    {
+      "epoch": 0.5913943707538014,
+      "grad_norm": 0.6839838027954102,
+      "learning_rate": 9.721067757560303e-05,
+      "loss": 0.7288,
+      "step": 457
+    },
+    {
+      "epoch": 0.5926884503396959,
+      "grad_norm": 0.8407920598983765,
+      "learning_rate": 9.719708367035767e-05,
+      "loss": 0.858,
+      "step": 458
+    },
+    {
+      "epoch": 0.5939825299255904,
+      "grad_norm": 0.8388239741325378,
+      "learning_rate": 9.718345767546576e-05,
+      "loss": 0.8455,
+      "step": 459
+    },
+    {
+      "epoch": 0.595276609511485,
+      "grad_norm": 0.7476726770401001,
+      "learning_rate": 9.716979960019173e-05,
+      "loss": 0.8261,
+      "step": 460
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.142510989790413e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null