Training in progress, step 340, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5a88ed30b979e8b79a0b8e0587def94ba339e1948a5407035566744f99c4d5d
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c82180f14717620e0bf756c85b583676dc7d69114368727db88aae103d64aa3
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fb839e71f2559609681b6b808413e24fd0cd168e4b4c062feff2eeb23eeb240
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:d94f089a1e467259428315e6e4ad483b5faaba1f2396f78d9eb6bf56641de7b9
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07fa940b7237013ec0cf4dbb080091b3cbff44f1b6dbb14ecfef39a6acf258de
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe6a98cf43c9788166599038375eb01cf5493b9863f02eb9463a77c771ad5463
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dee82665dfa8e81a745327347013c931be0b2da410ed681c21a036a30f1549b1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5097543f48561dead5ef0816caa212645fb361fee6d0137dbeaac39adc3b6ec8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.43351666127466837,
   "eval_steps": 386,
-  "global_step": 335,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2360,6 +2360,41 @@
       "learning_rate": 9.862570513027735e-05,
       "loss": 0.9637,
       "step": 335
     }
   ],
   "logging_steps": 1,
@@ -2379,7 +2414,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.745089525173453e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.43998705920414105,
   "eval_steps": 386,
+  "global_step": 340,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.862570513027735e-05,
       "loss": 0.9637,
       "step": 335
+    },
+    {
+      "epoch": 0.4348107408605629,
+      "grad_norm": 0.8372804522514343,
+      "learning_rate": 9.861608885094012e-05,
+      "loss": 0.8609,
+      "step": 336
+    },
+    {
+      "epoch": 0.43610482044645743,
+      "grad_norm": 0.8712325096130371,
+      "learning_rate": 9.860643951716421e-05,
+      "loss": 0.9718,
+      "step": 337
+    },
+    {
+      "epoch": 0.437398900032352,
+      "grad_norm": 0.9869045615196228,
+      "learning_rate": 9.859675713551028e-05,
+      "loss": 0.887,
+      "step": 338
+    },
+    {
+      "epoch": 0.43869297961824655,
+      "grad_norm": 0.9166460037231445,
+      "learning_rate": 9.858704171256145e-05,
+      "loss": 1.0751,
+      "step": 339
+    },
+    {
+      "epoch": 0.43998705920414105,
+      "grad_norm": 1.1965091228485107,
+      "learning_rate": 9.857729325492329e-05,
+      "loss": 1.0093,
+      "step": 340
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.800986383758131e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null