Training in progress, step 66, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90d79d2c6a0324b03cb35342e1eec3b56e142ca2f0da7c62095f5ce7b9ff906c
 size 2436967616

 version https://git-lfs.github.com/spec/v1
+oid sha256:17a581fe4340510e27eafe021c2fe545f48e7ece8d44cb412fb36a4c61793ff2
 size 2436967616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c51bd7b11c2870a564116880817f1f55bc01fbf58ddbb9164107077180ae7063
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:677ab0db2ba4be7ff2cd83bfd86350c42a0588ae8dd0ce8ba95f57b5909abeb7
 size 671466706

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87d6d58d6547e0dadd68ca2d524a12276722228e89bd05f766ff41463f4f0a30
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3eba25965dd82562680a1b10ab4b72c93429b7d66978c078631695befb33f6ab
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0c0f87e22a22172ee9701122c5b35062b5ba2a80f8dc5427443bd9873d88d17
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3590c95beccaed6e985c8f99327ccb7e1e0604a90f734f2477dfbcb408474e61
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 4.000638484954834,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.015966151758272464,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,118 @@
       "eval_samples_per_second": 3.424,
       "eval_steps_per_second": 3.424,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -404,12 +516,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.492021709635584e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 4.000638484954834,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.02107532032091965,
   "eval_steps": 25,
+  "global_step": 66,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.424,
       "eval_steps_per_second": 3.424,
       "step": 50
+    },
+    {
+      "epoch": 0.01628547479343791,
+      "grad_norm": 18.63812255859375,
+      "learning_rate": 2.165719935902685e-05,
+      "loss": 3.9371,
+      "step": 51
+    },
+    {
+      "epoch": 0.01660479782860336,
+      "grad_norm": 15.783567428588867,
+      "learning_rate": 2.0214529598676836e-05,
+      "loss": 3.4496,
+      "step": 52
+    },
+    {
+      "epoch": 0.01692412086376881,
+      "grad_norm": 16.15505599975586,
+      "learning_rate": 1.8855661083370986e-05,
+      "loss": 3.1263,
+      "step": 53
+    },
+    {
+      "epoch": 0.01724344389893426,
+      "grad_norm": 12.968737602233887,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 3.0368,
+      "step": 54
+    },
+    {
+      "epoch": 0.017562766934099708,
+      "grad_norm": 9.380385398864746,
+      "learning_rate": 1.6402212549987762e-05,
+      "loss": 2.9326,
+      "step": 55
+    },
+    {
+      "epoch": 0.017882089969265158,
+      "grad_norm": 7.317039489746094,
+      "learning_rate": 1.531354310432403e-05,
+      "loss": 2.5816,
+      "step": 56
+    },
+    {
+      "epoch": 0.01820141300443061,
+      "grad_norm": 4.853557109832764,
+      "learning_rate": 1.4320481809445051e-05,
+      "loss": 2.6577,
+      "step": 57
+    },
+    {
+      "epoch": 0.018520736039596055,
+      "grad_norm": 4.9439239501953125,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 3.1326,
+      "step": 58
+    },
+    {
+      "epoch": 0.018840059074761505,
+      "grad_norm": 4.261263847351074,
+      "learning_rate": 1.2630517066764069e-05,
+      "loss": 2.8784,
+      "step": 59
+    },
+    {
+      "epoch": 0.019159382109926956,
+      "grad_norm": 3.9718527793884277,
+      "learning_rate": 1.1937684892050604e-05,
+      "loss": 3.032,
+      "step": 60
+    },
+    {
+      "epoch": 0.019478705145092406,
+      "grad_norm": 3.4298315048217773,
+      "learning_rate": 1.1348593606245522e-05,
+      "loss": 2.6723,
+      "step": 61
+    },
+    {
+      "epoch": 0.019798028180257853,
+      "grad_norm": 3.560950994491577,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 2.7488,
+      "step": 62
+    },
+    {
+      "epoch": 0.020117351215423303,
+      "grad_norm": 3.4607114791870117,
+      "learning_rate": 1.0487057051584856e-05,
+      "loss": 2.5312,
+      "step": 63
+    },
+    {
+      "epoch": 0.020436674250588753,
+      "grad_norm": 3.4417977333068848,
+      "learning_rate": 1.0216687299751144e-05,
+      "loss": 3.2348,
+      "step": 64
+    },
+    {
+      "epoch": 0.0207559972857542,
+      "grad_norm": 3.5609071254730225,
+      "learning_rate": 1.0054204470767243e-05,
+      "loss": 3.0496,
+      "step": 65
+    },
+    {
+      "epoch": 0.02107532032091965,
+      "grad_norm": 5.164323806762695,
+      "learning_rate": 1e-05,
+      "loss": 2.4215,
+      "step": 66
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.969468656718971e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null