Training in progress, step 700, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2818821d2bd85c742350df6f93ddc6ceaa5a28fbd9c64e00f2455a5ea94572d
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:1aabb06c1fe37ff25379e382b1f17082b07d34182a4fc2da6ffae8579ac4675e
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3c3d6d6a41f72e4e2daea557637ea5b684d9fb7a2db23f4008a8c201310880c
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d75409d4f4ef99a18e288d75b5c709cef3c171c10c32cf11b3494ebb2d324c6
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c791046602634dc7125db7c8a6fa9643238ca36be61916506dd41a5029b29c2a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:db4a5aa1d65732ea6e6ad6ffbd33a4afe19476644f2bd043f99022469dab6bc0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:743b3e06741f3e617852c286974506bffbbfec118a8156be26ba7327d67612e1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e88a0b12b39fadaa49a1f55d69192330694c5d8626f92166735ca7ee1b34dd9e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8993853121967001,
   "eval_steps": 386,
-  "global_step": 695,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4888,6 +4888,41 @@
       "learning_rate": 9.309365924047853e-05,
       "loss": 0.9606,
       "step": 695
     }
   ],
   "logging_steps": 1,
@@ -4907,7 +4942,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.769663343270298e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9058557101261727,
   "eval_steps": 386,
+  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.309365924047853e-05,
       "loss": 0.9606,
       "step": 695
+    },
+    {
+      "epoch": 0.9006793917825946,
+      "grad_norm": 0.7008129954338074,
+      "learning_rate": 9.307273676353432e-05,
+      "loss": 0.8531,
+      "step": 696
+    },
+    {
+      "epoch": 0.9019734713684892,
+      "grad_norm": 0.8026110529899597,
+      "learning_rate": 9.305178500111755e-05,
+      "loss": 0.7784,
+      "step": 697
+    },
+    {
+      "epoch": 0.9032675509543837,
+      "grad_norm": 0.7309970855712891,
+      "learning_rate": 9.30308039674735e-05,
+      "loss": 0.9284,
+      "step": 698
+    },
+    {
+      "epoch": 0.9045616305402783,
+      "grad_norm": 0.801511824131012,
+      "learning_rate": 9.300979367686729e-05,
+      "loss": 0.8111,
+      "step": 699
+    },
+    {
+      "epoch": 0.9058557101261727,
+      "grad_norm": 0.8487135767936707,
+      "learning_rate": 9.298875414358399e-05,
+      "loss": 0.9095,
+      "step": 700
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.825560201854976e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null