Training in progress, step 395, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b63a9540fa7412ca48380046bfa8cfe6ba36c7a222a8f8ddd5253942cfdf523a
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:aebe659d2f76c3b6d0004991e20f559679d6cf72d52e6cf39d03f3a1993703e4
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76f198036b0c9f1dbf756dfc785905c4dedca2e73a89d2c7e455140f93d15c9a
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae61259f2e63dcfe7725038bb24c483967928f6ef0d982a8f8cd2b593a116162
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62f9db20d138e666739444e689769719e8c52e45b26e4605c90eb0139d43c213
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba118175917d0b3967d192cc434ab0812fa81db5e2563d542f010b46a09a3fbb
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85cca89790b4363b0104a6549c66e609ee25936e2ae1c6e763e441ae3b5c53ff
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6bf5e612f4fd4a32693b4794e80a1434e64b43319b927beb253a75b7d3b07a4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5046910384988677,
   "eval_steps": 386,
-  "global_step": 390,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2753,6 +2753,41 @@
       "learning_rate": 9.804790369135718e-05,
       "loss": 0.8657,
       "step": 390
     }
   ],
   "logging_steps": 1,
@@ -2772,7 +2807,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.359954969604915e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5111614364283403,
   "eval_steps": 386,
+  "global_step": 395,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.804790369135718e-05,
       "loss": 0.8657,
       "step": 390
+    },
+    {
+      "epoch": 0.5059851180847622,
+      "grad_norm": 0.8373358845710754,
+      "learning_rate": 9.80364797483707e-05,
+      "loss": 0.9175,
+      "step": 391
+    },
+    {
+      "epoch": 0.5072791976706568,
+      "grad_norm": 0.8288902640342712,
+      "learning_rate": 9.802502314502607e-05,
+      "loss": 0.7463,
+      "step": 392
+    },
+    {
+      "epoch": 0.5085732772565513,
+      "grad_norm": 0.6780114769935608,
+      "learning_rate": 9.801353388911269e-05,
+      "loss": 0.7973,
+      "step": 393
+    },
+    {
+      "epoch": 0.5098673568424458,
+      "grad_norm": 0.9328367710113525,
+      "learning_rate": 9.800201198844221e-05,
+      "loss": 1.0405,
+      "step": 394
+    },
+    {
+      "epoch": 0.5111614364283403,
+      "grad_norm": 0.9010327458381653,
+      "learning_rate": 9.799045745084847e-05,
+      "loss": 1.1194,
+      "step": 395
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.4158518281895936e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null