Training in progress, step 349600, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +234 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -23,12 +23,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "down_proj",
-    "v_proj",
-    "k_proj",
     "gate_proj",
     "o_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "gate_proj",
+    "v_proj",
     "o_proj",
+    "q_proj",
+    "k_proj",
+    "down_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
 size 1342238560

 version https://git-lfs.github.com/spec/v1
+oid sha256:4cdea3181ff20981cb33a156aeb80b8e43818a8f05858b95c059340e21aa8733
 size 1342238560

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:781887d172314801ab8802842158c08145ef998a6a80b07686139a50d9285ded
 size 683268498

 version https://git-lfs.github.com/spec/v1
+oid sha256:6cd506869b94b5be970fca409bc95398841b5ffd9a592fb2c061dc58227eceba
 size 683268498

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e85f0257d01a91ff4050d39219e8dd384bbb4cfdc5b2e0fb4fabf6b2fe3b33e2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3650e87ca836906ee8691b7d5d7b6eea6cead08041d7a7bfe35d5aa4494083ae
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:840aa8e3a2615e43038d3be582aa3892a5d4ec1157dbf18b35d8a9ff2904fee4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d7ddbf1d6d4ba083fc85b63bf4848a26d38ae4d64ba73f9a2514a5c0d3a9d8c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.23855044785586269,
   "eval_steps": 500,
-  "global_step": 343000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12012,6 +12012,237 @@
       "learning_rate": 1.9514285936093064e-05,
       "loss": 1.6889,
       "step": 343000
     }
   ],
   "logging_steps": 200,
@@ -12031,7 +12262,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.567214300600918e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.24314063140061104,
   "eval_steps": 500,
+  "global_step": 349600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9514285936093064e-05,
       "loss": 1.6889,
       "step": 343000
+    },
+    {
+      "epoch": 0.23868954432691566,
+      "grad_norm": 3.7890496253967285,
+      "learning_rate": 1.951372599351318e-05,
+      "loss": 1.6764,
+      "step": 343200
+    },
+    {
+      "epoch": 0.23882864079796864,
+      "grad_norm": 4.050221920013428,
+      "learning_rate": 1.9513165739975493e-05,
+      "loss": 1.6499,
+      "step": 343400
+    },
+    {
+      "epoch": 0.23896773726902162,
+      "grad_norm": 7.2723388671875,
+      "learning_rate": 1.951260517550675e-05,
+      "loss": 1.6283,
+      "step": 343600
+    },
+    {
+      "epoch": 0.2391068337400746,
+      "grad_norm": 4.328615665435791,
+      "learning_rate": 1.951204430013371e-05,
+      "loss": 1.6806,
+      "step": 343800
+    },
+    {
+      "epoch": 0.23924593021112758,
+      "grad_norm": 6.319999694824219,
+      "learning_rate": 1.9511483113883144e-05,
+      "loss": 1.6528,
+      "step": 344000
+    },
+    {
+      "epoch": 0.23938502668218056,
+      "grad_norm": 3.773545265197754,
+      "learning_rate": 1.9510921616781844e-05,
+      "loss": 1.643,
+      "step": 344200
+    },
+    {
+      "epoch": 0.23952412315323354,
+      "grad_norm": 4.095102787017822,
+      "learning_rate": 1.9510359808856623e-05,
+      "loss": 1.6922,
+      "step": 344400
+    },
+    {
+      "epoch": 0.23966321962428652,
+      "grad_norm": 5.804976463317871,
+      "learning_rate": 1.950979769013429e-05,
+      "loss": 1.6055,
+      "step": 344600
+    },
+    {
+      "epoch": 0.2398023160953395,
+      "grad_norm": 9.323091506958008,
+      "learning_rate": 1.9509235260641682e-05,
+      "loss": 1.6792,
+      "step": 344800
+    },
+    {
+      "epoch": 0.23994141256639248,
+      "grad_norm": 3.1342084407806396,
+      "learning_rate": 1.950867252040566e-05,
+      "loss": 1.6612,
+      "step": 345000
+    },
+    {
+      "epoch": 0.24008050903744546,
+      "grad_norm": 5.803501605987549,
+      "learning_rate": 1.9508109469453075e-05,
+      "loss": 1.6417,
+      "step": 345200
+    },
+    {
+      "epoch": 0.24021960550849844,
+      "grad_norm": 2.7702744007110596,
+      "learning_rate": 1.9507546107810813e-05,
+      "loss": 1.6397,
+      "step": 345400
+    },
+    {
+      "epoch": 0.24035870197955142,
+      "grad_norm": 6.073428153991699,
+      "learning_rate": 1.9506982435505766e-05,
+      "loss": 1.6321,
+      "step": 345600
+    },
+    {
+      "epoch": 0.2404977984506044,
+      "grad_norm": 4.152920246124268,
+      "learning_rate": 1.9506418452564844e-05,
+      "loss": 1.6407,
+      "step": 345800
+    },
+    {
+      "epoch": 0.24063689492165738,
+      "grad_norm": 3.3686416149139404,
+      "learning_rate": 1.9505854159014972e-05,
+      "loss": 1.7123,
+      "step": 346000
+    },
+    {
+      "epoch": 0.24077599139271036,
+      "grad_norm": 4.510013103485107,
+      "learning_rate": 1.950528955488309e-05,
+      "loss": 1.6391,
+      "step": 346200
+    },
+    {
+      "epoch": 0.24091508786376334,
+      "grad_norm": 4.174516201019287,
+      "learning_rate": 1.9504724640196143e-05,
+      "loss": 1.669,
+      "step": 346400
+    },
+    {
+      "epoch": 0.24105418433481635,
+      "grad_norm": 4.815258979797363,
+      "learning_rate": 1.9504159414981112e-05,
+      "loss": 1.6308,
+      "step": 346600
+    },
+    {
+      "epoch": 0.24119328080586933,
+      "grad_norm": 3.8510098457336426,
+      "learning_rate": 1.950359387926497e-05,
+      "loss": 1.6688,
+      "step": 346800
+    },
+    {
+      "epoch": 0.2413323772769223,
+      "grad_norm": 6.116521835327148,
+      "learning_rate": 1.950302803307472e-05,
+      "loss": 1.6655,
+      "step": 347000
+    },
+    {
+      "epoch": 0.24147147374797528,
+      "grad_norm": 4.095193862915039,
+      "learning_rate": 1.9502461876437376e-05,
+      "loss": 1.6587,
+      "step": 347200
+    },
+    {
+      "epoch": 0.24161057021902826,
+      "grad_norm": 3.3929877281188965,
+      "learning_rate": 1.9501895409379958e-05,
+      "loss": 1.5897,
+      "step": 347400
+    },
+    {
+      "epoch": 0.24174966669008124,
+      "grad_norm": 4.79518461227417,
+      "learning_rate": 1.9501328631929515e-05,
+      "loss": 1.6804,
+      "step": 347600
+    },
+    {
+      "epoch": 0.24188876316113422,
+      "grad_norm": 4.848894119262695,
+      "learning_rate": 1.9500761544113106e-05,
+      "loss": 1.6742,
+      "step": 347800
+    },
+    {
+      "epoch": 0.2420278596321872,
+      "grad_norm": 4.406215667724609,
+      "learning_rate": 1.9500194145957797e-05,
+      "loss": 1.711,
+      "step": 348000
+    },
+    {
+      "epoch": 0.24216695610324018,
+      "grad_norm": 7.045769214630127,
+      "learning_rate": 1.949962643749068e-05,
+      "loss": 1.591,
+      "step": 348200
+    },
+    {
+      "epoch": 0.24230605257429316,
+      "grad_norm": 5.135491847991943,
+      "learning_rate": 1.9499058418738855e-05,
+      "loss": 1.6447,
+      "step": 348400
+    },
+    {
+      "epoch": 0.24244514904534614,
+      "grad_norm": 4.513916492462158,
+      "learning_rate": 1.9498490089729438e-05,
+      "loss": 1.6319,
+      "step": 348600
+    },
+    {
+      "epoch": 0.24258424551639912,
+      "grad_norm": 3.753251791000366,
+      "learning_rate": 1.949792145048956e-05,
+      "loss": 1.6632,
+      "step": 348800
+    },
+    {
+      "epoch": 0.2427233419874521,
+      "grad_norm": 3.935469150543213,
+      "learning_rate": 1.949735250104637e-05,
+      "loss": 1.678,
+      "step": 349000
+    },
+    {
+      "epoch": 0.24286243845850508,
+      "grad_norm": 5.35392951965332,
+      "learning_rate": 1.9496783241427026e-05,
+      "loss": 1.6673,
+      "step": 349200
+    },
+    {
+      "epoch": 0.24300153492955806,
+      "grad_norm": 4.7084879875183105,
+      "learning_rate": 1.9496213671658703e-05,
+      "loss": 1.6702,
+      "step": 349400
+    },
+    {
+      "epoch": 0.24314063140061104,
+      "grad_norm": 4.929116249084473,
+      "learning_rate": 1.94956437917686e-05,
+      "loss": 1.6643,
+      "step": 349600
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 4.654650097453105e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f194f0afbf00cd135f18b6f6e0dc2d489f2d84487accfafc9254221384d4d16
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:078136780f82e4c02daf15d387ce5ace039d2fead953e9b2034a974f0b6417e9
 size 6840