Training in progress, step 150000

Browse files

Files changed (13) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +203 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:878ac2af256d90283abe99c8603dab07e40eb73da1c3655fc21a49086d6f8483
 size 50044689

 version https://git-lfs.github.com/spec/v1
+oid sha256:115ffe163e25d965deaf1e95234c158ffd00fbc71531098c0b9ab43a32b44422
 size 50044689

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34c22557eb08501f7a24373ea155511c565e71575e567e7ff811308f57ab5e12
 size 25761253

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f5f5ae4e2c1f14e613311f65c9deaf0995f26ac33b108067f68da4920fbd182
 size 25761253

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
 size 14503

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
 size 14503

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
 size 14503

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
 size 14503

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
 size 14503

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
 size 14503

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
 size 14503

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
 size 14503

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed68d92642b5c57649c135331b8243d8047b1dee7f4eb5f6f68f9dc4d2f32821
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:684030441e546f328363202be7e7a1e6d60b5494506eab9e81487ca712343e2e
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 8.57843137254902,
-  "global_step": 140000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2806,11 +2806,211 @@
       "eval_samples_per_second": 777.265,
       "eval_steps_per_second": 12.436,
       "step": 140000
     }
   ],
   "max_steps": 250000,
   "num_train_epochs": 16,
-  "total_flos": 2.2422719560923365e+21,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.191176470588236,
+  "global_step": 150000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 777.265,
       "eval_steps_per_second": 12.436,
       "step": 140000
+    },
+    {
+      "epoch": 8.61,
+      "learning_rate": 0.00026899475121297924,
+      "loss": 0.46,
+      "step": 140500
+    },
+    {
+      "epoch": 8.64,
+      "learning_rate": 0.00026705904531820914,
+      "loss": 0.4597,
+      "step": 141000
+    },
+    {
+      "epoch": 8.64,
+      "eval_loss": 0.8088939189910889,
+      "eval_runtime": 1.2732,
+      "eval_samples_per_second": 785.451,
+      "eval_steps_per_second": 12.567,
+      "step": 141000
+    },
+    {
+      "epoch": 8.67,
+      "learning_rate": 0.0002651249990837085,
+      "loss": 0.4596,
+      "step": 141500
+    },
+    {
+      "epoch": 8.7,
+      "learning_rate": 0.00026319269711092485,
+      "loss": 0.4591,
+      "step": 142000
+    },
+    {
+      "epoch": 8.7,
+      "eval_loss": 0.8040044903755188,
+      "eval_runtime": 1.2887,
+      "eval_samples_per_second": 775.97,
+      "eval_steps_per_second": 12.416,
+      "step": 142000
+    },
+    {
+      "epoch": 8.73,
+      "learning_rate": 0.0002612622239250066,
+      "loss": 0.459,
+      "step": 142500
+    },
+    {
+      "epoch": 8.76,
+      "learning_rate": 0.0002593336639711046,
+      "loss": 0.4586,
+      "step": 143000
+    },
+    {
+      "epoch": 8.76,
+      "eval_loss": 0.7993264198303223,
+      "eval_runtime": 1.3493,
+      "eval_samples_per_second": 741.132,
+      "eval_steps_per_second": 11.858,
+      "step": 143000
+    },
+    {
+      "epoch": 8.79,
+      "learning_rate": 0.000257407101610679,
+      "loss": 0.4583,
+      "step": 143500
+    },
+    {
+      "epoch": 8.82,
+      "learning_rate": 0.00025548262111780846,
+      "loss": 0.4584,
+      "step": 144000
+    },
+    {
+      "epoch": 8.82,
+      "eval_loss": 0.8003845810890198,
+      "eval_runtime": 1.306,
+      "eval_samples_per_second": 765.701,
+      "eval_steps_per_second": 12.251,
+      "step": 144000
+    },
+    {
+      "epoch": 8.85,
+      "learning_rate": 0.0002535603066755043,
+      "loss": 0.4579,
+      "step": 144500
+    },
+    {
+      "epoch": 8.88,
+      "learning_rate": 0.00025164024237202764,
+      "loss": 0.4594,
+      "step": 145000
+    },
+    {
+      "epoch": 8.88,
+      "eval_loss": 0.7990729808807373,
+      "eval_runtime": 1.2616,
+      "eval_samples_per_second": 792.645,
+      "eval_steps_per_second": 12.682,
+      "step": 145000
+    },
+    {
+      "epoch": 8.92,
+      "learning_rate": 0.00024972251219721115,
+      "loss": 0.4573,
+      "step": 145500
+    },
+    {
+      "epoch": 8.95,
+      "learning_rate": 0.00024780720003878557,
+      "loss": 0.4574,
+      "step": 146000
+    },
+    {
+      "epoch": 8.95,
+      "eval_loss": 0.7956343293190002,
+      "eval_runtime": 1.2847,
+      "eval_samples_per_second": 778.391,
+      "eval_steps_per_second": 12.454,
+      "step": 146000
+    },
+    {
+      "epoch": 8.98,
+      "learning_rate": 0.00024589438967870925,
+      "loss": 0.4571,
+      "step": 146500
+    },
+    {
+      "epoch": 9.01,
+      "learning_rate": 0.00024398416478950394,
+      "loss": 0.4571,
+      "step": 147000
+    },
+    {
+      "epoch": 9.01,
+      "eval_loss": 0.7948459386825562,
+      "eval_runtime": 1.3733,
+      "eval_samples_per_second": 728.166,
+      "eval_steps_per_second": 11.651,
+      "step": 147000
+    },
+    {
+      "epoch": 9.04,
+      "learning_rate": 0.00024207660893059467,
+      "loss": 0.4565,
+      "step": 147500
+    },
+    {
+      "epoch": 9.07,
+      "learning_rate": 0.0002401718055446543,
+      "loss": 0.4565,
+      "step": 148000
+    },
+    {
+      "epoch": 9.07,
+      "eval_loss": 0.7982079982757568,
+      "eval_runtime": 1.249,
+      "eval_samples_per_second": 800.631,
+      "eval_steps_per_second": 12.81,
+      "step": 148000
+    },
+    {
+      "epoch": 9.1,
+      "learning_rate": 0.00023826983795395364,
+      "loss": 0.4561,
+      "step": 148500
+    },
+    {
+      "epoch": 9.13,
+      "learning_rate": 0.00023637078935671656,
+      "loss": 0.4563,
+      "step": 149000
+    },
+    {
+      "epoch": 9.13,
+      "eval_loss": 0.7960088849067688,
+      "eval_runtime": 1.2502,
+      "eval_samples_per_second": 799.9,
+      "eval_steps_per_second": 12.798,
+      "step": 149000
+    },
+    {
+      "epoch": 9.16,
+      "learning_rate": 0.00023447474282348085,
+      "loss": 0.4558,
+      "step": 149500
+    },
+    {
+      "epoch": 9.19,
+      "learning_rate": 0.00023258178129346424,
+      "loss": 0.4555,
+      "step": 150000
+    },
+    {
+      "epoch": 9.19,
+      "eval_loss": 0.8043127655982971,
+      "eval_runtime": 1.3326,
+      "eval_samples_per_second": 750.39,
+      "eval_steps_per_second": 12.006,
+      "step": 150000
     }
   ],
   "max_steps": 250000,
   "num_train_epochs": 16,
+  "total_flos": 2.4024299484545255e+21,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34c22557eb08501f7a24373ea155511c565e71575e567e7ff811308f57ab5e12
 size 25761253

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f5f5ae4e2c1f14e613311f65c9deaf0995f26ac33b108067f68da4920fbd182
 size 25761253