Training in progress, step 87200, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +66 -3

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "q_proj",
     "up_proj",
-    "gate_proj",
-    "v_proj",
     "k_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
+    "q_proj",
+    "o_proj",
+    "down_proj",
     "k_proj",
+    "gate_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c827c6acb286eef9eb5d9fab2316b7545ab03a9b49ef673a99c9760af01f486c
 size 2684416208

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2266f5450f07ca58be26969588d2309a083856c6f1fbfcfef2944823461d4b8
 size 2684416208

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed24af72109d31f584e84102cf570b3ecb488d7c1a351bcbcfffde2791f83406
 size 1364844242

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbb1a1ecaf99901331980a6092485606f5fe6b39f40f14c4d977abd6611b621b
 size 1364844242

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f17c9ef1f7995726c517252ce76596fff06bfd0bd04d841db28af93fbf681c2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:150cb34b7b1a58c082469350f25504af368f8c5c46b3fccd8d280708ff720ac8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a35fbe204f24b2fb43e35237525d951bf4c389930c0542629031c4bddc16ea54
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e6917f22460b17aaa8e706e55d947d0135dd26e75e383913c8231aebf75deef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1187883036640767,
   "eval_steps": 200,
-  "global_step": 85400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2380,6 +2380,69 @@
       "learning_rate": 1.9311780891052998e-05,
       "loss": 1.6567,
       "step": 85400
     }
   ],
   "logging_steps": 200,
@@ -2399,7 +2462,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.663223037366141e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.12129203840172702,
   "eval_steps": 200,
+  "global_step": 87200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9311780891052998e-05,
       "loss": 1.6567,
       "step": 85400
+    },
+    {
+      "epoch": 0.1190664964127045,
+      "grad_norm": 0.23755620419979095,
+      "learning_rate": 1.930859113373952e-05,
+      "loss": 1.7054,
+      "step": 85600
+    },
+    {
+      "epoch": 0.11934468916133233,
+      "grad_norm": 0.29518914222717285,
+      "learning_rate": 1.9305394266234104e-05,
+      "loss": 1.6406,
+      "step": 85800
+    },
+    {
+      "epoch": 0.11962288190996014,
+      "grad_norm": 0.5197004675865173,
+      "learning_rate": 1.9302190290978622e-05,
+      "loss": 1.6807,
+      "step": 86000
+    },
+    {
+      "epoch": 0.11990107465858794,
+      "grad_norm": 0.2740679979324341,
+      "learning_rate": 1.929897921042036e-05,
+      "loss": 1.6977,
+      "step": 86200
+    },
+    {
+      "epoch": 0.12017926740721577,
+      "grad_norm": 0.33021771907806396,
+      "learning_rate": 1.9295761027012046e-05,
+      "loss": 1.6943,
+      "step": 86400
+    },
+    {
+      "epoch": 0.12045746015584358,
+      "grad_norm": 0.32778891921043396,
+      "learning_rate": 1.929253574321183e-05,
+      "loss": 1.6941,
+      "step": 86600
+    },
+    {
+      "epoch": 0.12073565290447139,
+      "grad_norm": 0.3531610369682312,
+      "learning_rate": 1.9289303361483284e-05,
+      "loss": 1.7031,
+      "step": 86800
+    },
+    {
+      "epoch": 0.12101384565309921,
+      "grad_norm": 0.4716193377971649,
+      "learning_rate": 1.9286063884295397e-05,
+      "loss": 1.668,
+      "step": 87000
+    },
+    {
+      "epoch": 0.12129203840172702,
+      "grad_norm": 0.35484832525253296,
+      "learning_rate": 1.928281731412259e-05,
+      "loss": 1.7128,
+      "step": 87200
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 1.7288134689589985e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null