Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7190639a8826c69a33b510d59a4f24ea9812831def055972aac00ead6ea06f20
 size 216151256

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ceda968bec862696775dfdd5ec47d280adc1fcd82977a99db609da3504d1fa6
 size 216151256

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27feb029eb5196aa48285e181df81996b66b320fc5c9743e22d5dbe348eec419
 size 432640054

 version https://git-lfs.github.com/spec/v1
+oid sha256:b32bda597d580ad6b6d45eb54387d5d940a7db119a6ad19c12d788323890abe9
 size 432640054

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d16b64b4b9b8c374446b746ed500c6a8b4ed8edac2e1d460226f899747217e8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:021fab17b81d17b3748f10439409c4a2987cdcb0559cbf843d1e11a55dd3de50
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.22700335085391998,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.32590983161325365,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 5.002,
       "eval_steps_per_second": 0.7,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.625264977707008e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.00530708022415638,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.4345464421510049,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.002,
       "eval_steps_per_second": 0.7,
       "step": 150
+    },
+    {
+      "epoch": 0.3280825638240087,
+      "grad_norm": 4.141190052032471,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.0664,
+      "step": 151
+    },
+    {
+      "epoch": 0.3302552960347637,
+      "grad_norm": 0.01306833978742361,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.0002,
+      "step": 152
+    },
+    {
+      "epoch": 0.33242802824551876,
+      "grad_norm": 18.984952926635742,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 1.9334,
+      "step": 153
+    },
+    {
+      "epoch": 0.33460076045627374,
+      "grad_norm": 17.12728500366211,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 1.4411,
+      "step": 154
+    },
+    {
+      "epoch": 0.3367734926670288,
+      "grad_norm": 4.665300369262695,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 0.1485,
+      "step": 155
+    },
+    {
+      "epoch": 0.3389462248777838,
+      "grad_norm": 0.299454927444458,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 0.0038,
+      "step": 156
+    },
+    {
+      "epoch": 0.34111895708853884,
+      "grad_norm": 1.3657584190368652,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 0.0076,
+      "step": 157
+    },
+    {
+      "epoch": 0.3432916892992939,
+      "grad_norm": 0.43887317180633545,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 0.0016,
+      "step": 158
+    },
+    {
+      "epoch": 0.3454644215100489,
+      "grad_norm": 0.03741230443120003,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.0003,
+      "step": 159
+    },
+    {
+      "epoch": 0.3476371537208039,
+      "grad_norm": 0.029207486659288406,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.0003,
+      "step": 160
+    },
+    {
+      "epoch": 0.34980988593155893,
+      "grad_norm": 0.021176263689994812,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.0002,
+      "step": 161
+    },
+    {
+      "epoch": 0.35198261814231396,
+      "grad_norm": 0.018373854458332062,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.0002,
+      "step": 162
+    },
+    {
+      "epoch": 0.354155350353069,
+      "grad_norm": 0.19128261506557465,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.003,
+      "step": 163
+    },
+    {
+      "epoch": 0.35632808256382403,
+      "grad_norm": 0.18957850337028503,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.0032,
+      "step": 164
+    },
+    {
+      "epoch": 0.358500814774579,
+      "grad_norm": 0.06007857620716095,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.0012,
+      "step": 165
+    },
+    {
+      "epoch": 0.36067354698533405,
+      "grad_norm": 0.03031660057604313,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.0006,
+      "step": 166
+    },
+    {
+      "epoch": 0.3628462791960891,
+      "grad_norm": 0.028533753007650375,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 0.0006,
+      "step": 167
+    },
+    {
+      "epoch": 0.3650190114068441,
+      "grad_norm": 0.02440357394516468,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.0004,
+      "step": 168
+    },
+    {
+      "epoch": 0.36719174361759915,
+      "grad_norm": 0.01583729311823845,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.0003,
+      "step": 169
+    },
+    {
+      "epoch": 0.36936447582835413,
+      "grad_norm": 0.04840531945228577,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 0.0004,
+      "step": 170
+    },
+    {
+      "epoch": 0.37153720803910917,
+      "grad_norm": 0.04926219582557678,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 0.0004,
+      "step": 171
+    },
+    {
+      "epoch": 0.3737099402498642,
+      "grad_norm": 0.039860330522060394,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.0003,
+      "step": 172
+    },
+    {
+      "epoch": 0.37588267246061924,
+      "grad_norm": 0.008798260241746902,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 0.0001,
+      "step": 173
+    },
+    {
+      "epoch": 0.3780554046713743,
+      "grad_norm": 0.0032490717712789774,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 0.0001,
+      "step": 174
+    },
+    {
+      "epoch": 0.38022813688212925,
+      "grad_norm": 0.005146032199263573,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.0001,
+      "step": 175
+    },
+    {
+      "epoch": 0.38022813688212925,
+      "eval_loss": 0.004405932500958443,
+      "eval_runtime": 9.9906,
+      "eval_samples_per_second": 5.005,
+      "eval_steps_per_second": 0.701,
+      "step": 175
+    },
+    {
+      "epoch": 0.3824008690928843,
+      "grad_norm": 0.00217464123852551,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 0.0001,
+      "step": 176
+    },
+    {
+      "epoch": 0.3845736013036393,
+      "grad_norm": 0.002429766347631812,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 0.0001,
+      "step": 177
+    },
+    {
+      "epoch": 0.38674633351439436,
+      "grad_norm": 0.002658179961144924,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 0.0001,
+      "step": 178
+    },
+    {
+      "epoch": 0.3889190657251494,
+      "grad_norm": 0.0013467564713209867,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 0.0,
+      "step": 179
+    },
+    {
+      "epoch": 0.3910917979359044,
+      "grad_norm": 0.001209027017466724,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 0.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.3932645301466594,
+      "grad_norm": 0.002041027182713151,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.0,
+      "step": 181
+    },
+    {
+      "epoch": 0.39543726235741444,
+      "grad_norm": 0.002488530706614256,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 0.0,
+      "step": 182
+    },
+    {
+      "epoch": 0.3976099945681695,
+      "grad_norm": 0.0028726570308208466,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 0.0,
+      "step": 183
+    },
+    {
+      "epoch": 0.3997827267789245,
+      "grad_norm": 0.00741666229441762,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 0.0001,
+      "step": 184
+    },
+    {
+      "epoch": 0.40195545898967955,
+      "grad_norm": 0.011522922664880753,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 0.0002,
+      "step": 185
+    },
+    {
+      "epoch": 0.40412819120043453,
+      "grad_norm": 0.005647687241435051,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 0.0001,
+      "step": 186
+    },
+    {
+      "epoch": 0.40630092341118956,
+      "grad_norm": 0.012015015818178654,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 0.0002,
+      "step": 187
+    },
+    {
+      "epoch": 0.4084736556219446,
+      "grad_norm": 0.011731148697435856,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.0003,
+      "step": 188
+    },
+    {
+      "epoch": 0.41064638783269963,
+      "grad_norm": 0.015556137077510357,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 0.0003,
+      "step": 189
+    },
+    {
+      "epoch": 0.41281912004345467,
+      "grad_norm": 0.012860697694122791,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.0003,
+      "step": 190
+    },
+    {
+      "epoch": 0.41499185225420965,
+      "grad_norm": 0.010495350696146488,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.0003,
+      "step": 191
+    },
+    {
+      "epoch": 0.4171645844649647,
+      "grad_norm": 0.005393492057919502,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 0.0001,
+      "step": 192
+    },
+    {
+      "epoch": 0.4193373166757197,
+      "grad_norm": 0.005002427380532026,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.0001,
+      "step": 193
+    },
+    {
+      "epoch": 0.42151004888647475,
+      "grad_norm": 0.004846799187362194,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.0001,
+      "step": 194
+    },
+    {
+      "epoch": 0.4236827810972298,
+      "grad_norm": 0.006467183120548725,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.0001,
+      "step": 195
+    },
+    {
+      "epoch": 0.42585551330798477,
+      "grad_norm": 0.005925624165683985,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.0001,
+      "step": 196
+    },
+    {
+      "epoch": 0.4280282455187398,
+      "grad_norm": 0.0039407857693731785,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.0001,
+      "step": 197
+    },
+    {
+      "epoch": 0.43020097772949484,
+      "grad_norm": 0.003776873927563429,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 0.0001,
+      "step": 198
+    },
+    {
+      "epoch": 0.4323737099402499,
+      "grad_norm": 0.008124479092657566,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 0.0002,
+      "step": 199
+    },
+    {
+      "epoch": 0.4345464421510049,
+      "grad_norm": 0.007906035520136356,
+      "learning_rate": 0.0,
+      "loss": 0.0002,
+      "step": 200
+    },
+    {
+      "epoch": 0.4345464421510049,
+      "eval_loss": 0.00530708022415638,
+      "eval_runtime": 9.9913,
+      "eval_samples_per_second": 5.004,
+      "eval_steps_per_second": 0.701,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.500353303609344e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null