Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:646286d7b4ac468f622c856ba141a9b4d5fb1685f0b5f62c1c8cab315120ced8
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff1095da7b7974a7118261d75470d0bf8e8213b8f8c185516c9d6faaec6a986b
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b0e8ad4f31bfd16a51a2b8c4bbef5e7090efea35ff4ba25f49dc76c4766436e
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:95086dc013ab5d33211ad1b2ba63f886bfa96a7000f171863cf718148f013862
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3aeec98de2a543bc4b433146bd590a4898fabeb53c61c1909eb8b961873d4ab0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ddde689d9d95eb07edcbeef86a46db0ec680fdbc60ee304591b8be2d39b91530
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.7135263681411743,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.1936108422071636,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 12.287,
       "eval_steps_per_second": 6.148,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.262770368118784e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.692399263381958,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.2581477896095515,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.287,
       "eval_steps_per_second": 6.148,
       "step": 150
+    },
+    {
+      "epoch": 0.19490158115521136,
+      "grad_norm": 1.852188229560852,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 1.5323,
+      "step": 151
+    },
+    {
+      "epoch": 0.19619232010325913,
+      "grad_norm": 1.9895038604736328,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 1.5872,
+      "step": 152
+    },
+    {
+      "epoch": 0.19748305905130686,
+      "grad_norm": 2.1691086292266846,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 1.5141,
+      "step": 153
+    },
+    {
+      "epoch": 0.19877379799935463,
+      "grad_norm": 2.243191957473755,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 1.5441,
+      "step": 154
+    },
+    {
+      "epoch": 0.2000645369474024,
+      "grad_norm": 2.0653717517852783,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 1.3843,
+      "step": 155
+    },
+    {
+      "epoch": 0.20135527589545016,
+      "grad_norm": 1.9524214267730713,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 1.6269,
+      "step": 156
+    },
+    {
+      "epoch": 0.2026460148434979,
+      "grad_norm": 2.0470457077026367,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 1.7161,
+      "step": 157
+    },
+    {
+      "epoch": 0.20393675379154566,
+      "grad_norm": 1.9399360418319702,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 1.388,
+      "step": 158
+    },
+    {
+      "epoch": 0.20522749273959343,
+      "grad_norm": 2.0418624877929688,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 1.5441,
+      "step": 159
+    },
+    {
+      "epoch": 0.20651823168764116,
+      "grad_norm": 2.3883256912231445,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 1.333,
+      "step": 160
+    },
+    {
+      "epoch": 0.20780897063568893,
+      "grad_norm": 2.139054298400879,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 1.5149,
+      "step": 161
+    },
+    {
+      "epoch": 0.2090997095837367,
+      "grad_norm": 2.215787410736084,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 1.5158,
+      "step": 162
+    },
+    {
+      "epoch": 0.21039044853178446,
+      "grad_norm": 2.343177318572998,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 1.5693,
+      "step": 163
+    },
+    {
+      "epoch": 0.2116811874798322,
+      "grad_norm": 2.1910648345947266,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 1.557,
+      "step": 164
+    },
+    {
+      "epoch": 0.21297192642787996,
+      "grad_norm": 1.98995041847229,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 1.7223,
+      "step": 165
+    },
+    {
+      "epoch": 0.21426266537592772,
+      "grad_norm": 2.28912091255188,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.565,
+      "step": 166
+    },
+    {
+      "epoch": 0.2155534043239755,
+      "grad_norm": 2.5017130374908447,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 1.6143,
+      "step": 167
+    },
+    {
+      "epoch": 0.21684414327202323,
+      "grad_norm": 2.1927993297576904,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 1.4325,
+      "step": 168
+    },
+    {
+      "epoch": 0.218134882220071,
+      "grad_norm": 2.1347038745880127,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 1.4508,
+      "step": 169
+    },
+    {
+      "epoch": 0.21942562116811876,
+      "grad_norm": 2.1405744552612305,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 1.4132,
+      "step": 170
+    },
+    {
+      "epoch": 0.2207163601161665,
+      "grad_norm": 2.3374786376953125,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 1.4786,
+      "step": 171
+    },
+    {
+      "epoch": 0.22200709906421426,
+      "grad_norm": 2.3734936714172363,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 1.4979,
+      "step": 172
+    },
+    {
+      "epoch": 0.22329783801226202,
+      "grad_norm": 2.0683765411376953,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 1.5925,
+      "step": 173
+    },
+    {
+      "epoch": 0.2245885769603098,
+      "grad_norm": 2.404156446456909,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 1.7332,
+      "step": 174
+    },
+    {
+      "epoch": 0.22587931590835753,
+      "grad_norm": 2.4572064876556396,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 1.545,
+      "step": 175
+    },
+    {
+      "epoch": 0.2271700548564053,
+      "grad_norm": 2.286088228225708,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 1.3256,
+      "step": 176
+    },
+    {
+      "epoch": 0.22846079380445306,
+      "grad_norm": 2.752758741378784,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 1.5806,
+      "step": 177
+    },
+    {
+      "epoch": 0.22975153275250082,
+      "grad_norm": 2.5368597507476807,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 1.809,
+      "step": 178
+    },
+    {
+      "epoch": 0.23104227170054856,
+      "grad_norm": 2.4749534130096436,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 1.5642,
+      "step": 179
+    },
+    {
+      "epoch": 0.23233301064859632,
+      "grad_norm": 2.5066912174224854,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 1.523,
+      "step": 180
+    },
+    {
+      "epoch": 0.2336237495966441,
+      "grad_norm": 2.532597780227661,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 1.6864,
+      "step": 181
+    },
+    {
+      "epoch": 0.23491448854469182,
+      "grad_norm": 2.580227851867676,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 1.2414,
+      "step": 182
+    },
+    {
+      "epoch": 0.2362052274927396,
+      "grad_norm": 2.5495617389678955,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.804,
+      "step": 183
+    },
+    {
+      "epoch": 0.23749596644078735,
+      "grad_norm": 2.505260944366455,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 1.558,
+      "step": 184
+    },
+    {
+      "epoch": 0.23878670538883512,
+      "grad_norm": 2.6142818927764893,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 1.9136,
+      "step": 185
+    },
+    {
+      "epoch": 0.24007744433688286,
+      "grad_norm": 2.6882243156433105,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 1.8068,
+      "step": 186
+    },
+    {
+      "epoch": 0.24136818328493062,
+      "grad_norm": 2.3261289596557617,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 1.7333,
+      "step": 187
+    },
+    {
+      "epoch": 0.24265892223297839,
+      "grad_norm": 2.6675865650177,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 1.7487,
+      "step": 188
+    },
+    {
+      "epoch": 0.24394966118102615,
+      "grad_norm": 2.6144895553588867,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 1.5506,
+      "step": 189
+    },
+    {
+      "epoch": 0.2452404001290739,
+      "grad_norm": 2.8195700645446777,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 2.0802,
+      "step": 190
+    },
+    {
+      "epoch": 0.24653113907712165,
+      "grad_norm": 2.959312677383423,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 2.1612,
+      "step": 191
+    },
+    {
+      "epoch": 0.24782187802516942,
+      "grad_norm": 3.211810350418091,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 1.6953,
+      "step": 192
+    },
+    {
+      "epoch": 0.24911261697321715,
+      "grad_norm": 2.8168299198150635,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 1.7243,
+      "step": 193
+    },
+    {
+      "epoch": 0.25040335592126495,
+      "grad_norm": 3.130859613418579,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 2.1074,
+      "step": 194
+    },
+    {
+      "epoch": 0.25169409486931266,
+      "grad_norm": 3.0134541988372803,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 1.9204,
+      "step": 195
+    },
+    {
+      "epoch": 0.2529848338173604,
+      "grad_norm": 2.870702028274536,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 1.7521,
+      "step": 196
+    },
+    {
+      "epoch": 0.2542755727654082,
+      "grad_norm": 3.4485554695129395,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 2.0447,
+      "step": 197
+    },
+    {
+      "epoch": 0.25556631171345595,
+      "grad_norm": 3.369616746902466,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 2.2807,
+      "step": 198
+    },
+    {
+      "epoch": 0.2568570506615037,
+      "grad_norm": 3.441540479660034,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 2.1742,
+      "step": 199
+    },
+    {
+      "epoch": 0.2581477896095515,
+      "grad_norm": 5.625406742095947,
+      "learning_rate": 0.0,
+      "loss": 2.6312,
+      "step": 200
+    },
+    {
+      "epoch": 0.2581477896095515,
+      "eval_loss": 1.692399263381958,
+      "eval_runtime": 106.0797,
+      "eval_samples_per_second": 12.302,
+      "eval_steps_per_second": 6.156,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.017027157491712e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null