Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8fbc2e0e8c532b12d5ae650b860aefcac977a026f98d5a9b9b1cf5c5be586dfd
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:3295d5976673808dc24412a5de970a12ab80bc3ec255d16ba914323ffdfbbc05
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:617b22e00f0a497b70b1e137fbc558a04fde83dfeaa0028a2e95c7eab0f50acc
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f15ab6e7f6095924d67fdd8c458002ee64af3b93e3b0bdfcdb987b2b11d3cd0
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a34422589ed8b4e08d8507d1a22598d0cd3ce93e9659881ad0b41146a6489cc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:41b31be3551324932df2292a91940e599744f3599d34ee30fe8772305928cbeb
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.710475206375122,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.38113387327298714,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 12.918,
       "eval_steps_per_second": 1.622,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.9750196755339674e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.6097841262817383,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.7622677465459743,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.918,
       "eval_steps_per_second": 1.622,
       "step": 50
+    },
+    {
+      "epoch": 0.3887565507384469,
+      "grad_norm": 1.2314931154251099,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 2.6059,
+      "step": 51
+    },
+    {
+      "epoch": 0.39637922820390664,
+      "grad_norm": 1.3346525430679321,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 2.7265,
+      "step": 52
+    },
+    {
+      "epoch": 0.4040019056693664,
+      "grad_norm": 1.3276231288909912,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 2.7018,
+      "step": 53
+    },
+    {
+      "epoch": 0.4116245831348261,
+      "grad_norm": 1.3935623168945312,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 2.7656,
+      "step": 54
+    },
+    {
+      "epoch": 0.41924726060028583,
+      "grad_norm": 1.392889380455017,
+      "learning_rate": 2.5e-06,
+      "loss": 2.7419,
+      "step": 55
+    },
+    {
+      "epoch": 0.4268699380657456,
+      "grad_norm": 1.4677642583847046,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 2.8227,
+      "step": 56
+    },
+    {
+      "epoch": 0.43449261553120533,
+      "grad_norm": 1.4074047803878784,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 2.7652,
+      "step": 57
+    },
+    {
+      "epoch": 0.4421152929966651,
+      "grad_norm": 1.4082292318344116,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 2.7916,
+      "step": 58
+    },
+    {
+      "epoch": 0.4497379704621248,
+      "grad_norm": 1.5705230236053467,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 2.8949,
+      "step": 59
+    },
+    {
+      "epoch": 0.4573606479275846,
+      "grad_norm": 1.4800447225570679,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 2.8763,
+      "step": 60
+    },
+    {
+      "epoch": 0.4649833253930443,
+      "grad_norm": 1.7153626680374146,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 2.9525,
+      "step": 61
+    },
+    {
+      "epoch": 0.47260600285850407,
+      "grad_norm": 1.6682062149047852,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 2.9583,
+      "step": 62
+    },
+    {
+      "epoch": 0.4802286803239638,
+      "grad_norm": 2.1147046089172363,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 3.2925,
+      "step": 63
+    },
+    {
+      "epoch": 0.4878513577894235,
+      "grad_norm": 2.6401166915893555,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 4.3026,
+      "step": 64
+    },
+    {
+      "epoch": 0.49547403525488326,
+      "grad_norm": 0.7430612444877625,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 2.0217,
+      "step": 65
+    },
+    {
+      "epoch": 0.5030967127203431,
+      "grad_norm": 0.7868032455444336,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 2.1813,
+      "step": 66
+    },
+    {
+      "epoch": 0.5107193901858028,
+      "grad_norm": 0.8234112858772278,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 2.2301,
+      "step": 67
+    },
+    {
+      "epoch": 0.5183420676512626,
+      "grad_norm": 0.8945828676223755,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 2.2984,
+      "step": 68
+    },
+    {
+      "epoch": 0.5259647451167222,
+      "grad_norm": 0.8570656776428223,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 2.2342,
+      "step": 69
+    },
+    {
+      "epoch": 0.533587422582182,
+      "grad_norm": 0.8565142154693604,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 2.3728,
+      "step": 70
+    },
+    {
+      "epoch": 0.5412101000476417,
+      "grad_norm": 0.9122400879859924,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 2.3101,
+      "step": 71
+    },
+    {
+      "epoch": 0.5488327775131014,
+      "grad_norm": 0.9057995676994324,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 2.3959,
+      "step": 72
+    },
+    {
+      "epoch": 0.5564554549785612,
+      "grad_norm": 0.9446002244949341,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 2.3936,
+      "step": 73
+    },
+    {
+      "epoch": 0.5640781324440209,
+      "grad_norm": 0.977627694606781,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 2.4138,
+      "step": 74
+    },
+    {
+      "epoch": 0.5717008099094807,
+      "grad_norm": 0.9783718585968018,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 2.4278,
+      "step": 75
+    },
+    {
+      "epoch": 0.5793234873749404,
+      "grad_norm": 0.9705466628074646,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 2.4498,
+      "step": 76
+    },
+    {
+      "epoch": 0.5869461648404002,
+      "grad_norm": 1.0789556503295898,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 2.4888,
+      "step": 77
+    },
+    {
+      "epoch": 0.5945688423058599,
+      "grad_norm": 1.047377347946167,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 2.4992,
+      "step": 78
+    },
+    {
+      "epoch": 0.6021915197713197,
+      "grad_norm": 1.069031000137329,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 2.5096,
+      "step": 79
+    },
+    {
+      "epoch": 0.6098141972367794,
+      "grad_norm": 1.0925695896148682,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 2.5255,
+      "step": 80
+    },
+    {
+      "epoch": 0.6174368747022392,
+      "grad_norm": 1.0890165567398071,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 2.5195,
+      "step": 81
+    },
+    {
+      "epoch": 0.6250595521676989,
+      "grad_norm": 1.142870306968689,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 2.526,
+      "step": 82
+    },
+    {
+      "epoch": 0.6326822296331587,
+      "grad_norm": 1.1425656080245972,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 2.5021,
+      "step": 83
+    },
+    {
+      "epoch": 0.6403049070986184,
+      "grad_norm": 1.1594488620758057,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 2.6243,
+      "step": 84
+    },
+    {
+      "epoch": 0.6479275845640782,
+      "grad_norm": 1.2081568241119385,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 2.6227,
+      "step": 85
+    },
+    {
+      "epoch": 0.6555502620295379,
+      "grad_norm": 1.2804358005523682,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 2.6482,
+      "step": 86
+    },
+    {
+      "epoch": 0.6631729394949977,
+      "grad_norm": 1.2887688875198364,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 2.5992,
+      "step": 87
+    },
+    {
+      "epoch": 0.6707956169604573,
+      "grad_norm": 1.2514843940734863,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 2.735,
+      "step": 88
+    },
+    {
+      "epoch": 0.6784182944259171,
+      "grad_norm": 1.3292714357376099,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 2.6533,
+      "step": 89
+    },
+    {
+      "epoch": 0.6860409718913768,
+      "grad_norm": 1.38014554977417,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 2.7992,
+      "step": 90
+    },
+    {
+      "epoch": 0.6936636493568366,
+      "grad_norm": 1.4328645467758179,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 2.764,
+      "step": 91
+    },
+    {
+      "epoch": 0.7012863268222963,
+      "grad_norm": 1.6062235832214355,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 2.8302,
+      "step": 92
+    },
+    {
+      "epoch": 0.708909004287756,
+      "grad_norm": 1.6477878093719482,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 2.925,
+      "step": 93
+    },
+    {
+      "epoch": 0.7165316817532158,
+      "grad_norm": 1.810110330581665,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 2.922,
+      "step": 94
+    },
+    {
+      "epoch": 0.7241543592186755,
+      "grad_norm": 2.1606154441833496,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 3.3003,
+      "step": 95
+    },
+    {
+      "epoch": 0.7317770366841353,
+      "grad_norm": 2.8912160396575928,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 4.0696,
+      "step": 96
+    },
+    {
+      "epoch": 0.739399714149595,
+      "grad_norm": 0.7114972472190857,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 2.0816,
+      "step": 97
+    },
+    {
+      "epoch": 0.7470223916150548,
+      "grad_norm": 0.7526923418045044,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 2.15,
+      "step": 98
+    },
+    {
+      "epoch": 0.7546450690805145,
+      "grad_norm": 0.7823670506477356,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 2.1743,
+      "step": 99
+    },
+    {
+      "epoch": 0.7622677465459743,
+      "grad_norm": 0.8256052136421204,
+      "learning_rate": 0.0,
+      "loss": 2.2702,
+      "step": 100
+    },
+    {
+      "epoch": 0.7622677465459743,
+      "eval_loss": 2.6097841262817383,
+      "eval_runtime": 68.4212,
+      "eval_samples_per_second": 12.92,
+      "eval_steps_per_second": 1.622,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.946329850225623e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null