Training in progress, step 375, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3feb049f84caa6e45dce230afc9473e26f98520e4afa70a37072f4ef66b84c97
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:661491863db880d889462331cf28f24df2f6df70492c034b40f718f3f65523bb
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f0d3ec45aa2db7e380044bf42b2b85a321f290d5cd471c615808e98ec9617ea
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:32a154b794f3a0235100d8e5790fb4656d040d6aabf01fddc63b24d541ce7141
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:adca6753b49430d8d85732dd961bf4e168c9f525f80ba2505ec99cb6254ea83c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0002e057fedb4f60b1b185f2a74688f5fb1a0b57ce953ac999f33226551711f0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d671199ea99b2115775ea1b1182417de6788e71b7fb0dc4b7b1ebf77612cd032
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:035a0df081c185ccdc3573767b609ca409aa3c3d9e7594dbbc7218f373a1074d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.47880944678097703,
   "eval_steps": 386,
-  "global_step": 370,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2605,6 +2605,41 @@
       "learning_rate": 9.826951207696258e-05,
       "loss": 0.8781,
       "step": 370
     }
   ],
   "logging_steps": 1,
@@ -2624,7 +2659,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.1363675352662016e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4852798447104497,
   "eval_steps": 386,
+  "global_step": 375,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.826951207696258e-05,
       "loss": 0.8781,
       "step": 370
+    },
+    {
+      "epoch": 0.48010352636687154,
+      "grad_norm": 0.959077775478363,
+      "learning_rate": 9.825874294268396e-05,
+      "loss": 0.9448,
+      "step": 371
+    },
+    {
+      "epoch": 0.4813976059527661,
+      "grad_norm": 0.7479959726333618,
+      "learning_rate": 9.824794099692878e-05,
+      "loss": 0.8608,
+      "step": 372
+    },
+    {
+      "epoch": 0.48269168553866065,
+      "grad_norm": 0.8064723014831543,
+      "learning_rate": 9.823710624704137e-05,
+      "loss": 1.1222,
+      "step": 373
+    },
+    {
+      "epoch": 0.48398576512455516,
+      "grad_norm": 0.8007084131240845,
+      "learning_rate": 9.822623870038838e-05,
+      "loss": 0.8967,
+      "step": 374
+    },
+    {
+      "epoch": 0.4852798447104497,
+      "grad_norm": 0.988571286201477,
+      "learning_rate": 9.82153383643587e-05,
+      "loss": 0.9574,
+      "step": 375
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.19226439385088e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null