Training in progress, step 870, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82b7ea91db0b8414ee1ee370f896bbd26f3fbb47a73176b7f3f65b18d197a25b
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:91bc801b9c6caba8d2de27b5b887e4de8b7defb87eb66b3cacc1b8ec971f50b0
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43083f322054aff43bc6930fa6998ada6fdd635d44556e37099a3e09e1044dd6
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:f71c68db9cf60853128c313c3b9d8c9cfd204245155f1a9dec9e7c0c970a90f1
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d62a615819a68a35523082dd9af5336db4b9184e8270ea5a330d6ceefb606b95
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:213b6578b38160a37abb0e775f318cf97620a9fbc0b45491cba9a0e1b773ff55
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc3d8a2771961fb52eec0bdd20d20676362c5357acba011a4424ecf88c9a6ca9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1fea4e1f6790394bab44f5b295cba869de89257525462490220ac4d8ec4fec6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.1193788417987707,
   "eval_steps": 386,
-  "global_step": 865,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6086,6 +6086,41 @@
       "learning_rate": 8.912826898448561e-05,
       "loss": 0.6884,
       "step": 865
     }
   ],
   "logging_steps": 1,
@@ -6105,7 +6140,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.669457824417055e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.1258492397282434,
   "eval_steps": 386,
+  "global_step": 870,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.912826898448561e-05,
       "loss": 0.6884,
       "step": 865
+    },
+    {
+      "epoch": 1.1206729213846651,
+      "grad_norm": 0.8322413563728333,
+      "learning_rate": 8.91025882794697e-05,
+      "loss": 0.5871,
+      "step": 866
+    },
+    {
+      "epoch": 1.1219670009705598,
+      "grad_norm": 0.9673650860786438,
+      "learning_rate": 8.907688098831454e-05,
+      "loss": 0.9124,
+      "step": 867
+    },
+    {
+      "epoch": 1.1232610805564542,
+      "grad_norm": 1.0415488481521606,
+      "learning_rate": 8.905114712849875e-05,
+      "loss": 0.7218,
+      "step": 868
+    },
+    {
+      "epoch": 1.1245551601423487,
+      "grad_norm": 0.8599552512168884,
+      "learning_rate": 8.902538671751897e-05,
+      "loss": 0.8724,
+      "step": 869
+    },
+    {
+      "epoch": 1.1258492397282434,
+      "grad_norm": 0.8935772180557251,
+      "learning_rate": 8.899959977288987e-05,
+      "loss": 0.7266,
+      "step": 870
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 9.725354683001733e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null