Training in progress, step 349600

Browse files

Files changed (8) hide show

adapter_model.safetensors +1 -1
last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +3 -227
last-checkpoint/training_args.bin +1 -1

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a48e214829f3637e9f723c3d00cbe7d0201697caff8aaa56d68c10a31b8bf3be
 size 1342238560

 version https://git-lfs.github.com/spec/v1
+oid sha256:4cdea3181ff20981cb33a156aeb80b8e43818a8f05858b95c059340e21aa8733
 size 1342238560

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -23,12 +23,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
-    "v_proj",
-    "o_proj",
     "q_proj",
-    "k_proj",
     "down_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
     "down_proj",
+    "v_proj",
+    "k_proj",
+    "gate_proj",
+    "o_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a48e214829f3637e9f723c3d00cbe7d0201697caff8aaa56d68c10a31b8bf3be
 size 1342238560

 version https://git-lfs.github.com/spec/v1
+oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
 size 1342238560

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eadd45627c8ca1194f6799e6a9101451f15e5a8d56693bcdd7dcb2483df02796
 size 683268498

 version https://git-lfs.github.com/spec/v1
+oid sha256:781887d172314801ab8802842158c08145ef998a6a80b07686139a50d9285ded
 size 683268498

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb1195b42eac89ca32807fbd177cb327563eb691518ecba53b3eff5532117160
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e85f0257d01a91ff4050d39219e8dd384bbb4cfdc5b2e0fb4fabf6b2fe3b33e2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13dcd1e5d0637b6d75833d77a011a083cb0e718f6b0a9d87d999b175835a71d2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:840aa8e3a2615e43038d3be582aa3892a5d4ec1157dbf18b35d8a9ff2904fee4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.24300153492955806,
   "eval_steps": 500,
-  "global_step": 349400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12012,230 +12012,6 @@
       "learning_rate": 1.9514285936093064e-05,
       "loss": 1.6889,
       "step": 343000
-    },
-    {
-      "epoch": 0.23868954432691566,
-      "grad_norm": 3.7890496253967285,
-      "learning_rate": 1.951372599351318e-05,
-      "loss": 1.6764,
-      "step": 343200
-    },
-    {
-      "epoch": 0.23882864079796864,
-      "grad_norm": 4.050221920013428,
-      "learning_rate": 1.9513165739975493e-05,
-      "loss": 1.6499,
-      "step": 343400
-    },
-    {
-      "epoch": 0.23896773726902162,
-      "grad_norm": 7.2723388671875,
-      "learning_rate": 1.951260517550675e-05,
-      "loss": 1.6283,
-      "step": 343600
-    },
-    {
-      "epoch": 0.2391068337400746,
-      "grad_norm": 4.328615665435791,
-      "learning_rate": 1.951204430013371e-05,
-      "loss": 1.6806,
-      "step": 343800
-    },
-    {
-      "epoch": 0.23924593021112758,
-      "grad_norm": 6.319999694824219,
-      "learning_rate": 1.9511483113883144e-05,
-      "loss": 1.6528,
-      "step": 344000
-    },
-    {
-      "epoch": 0.23938502668218056,
-      "grad_norm": 3.773545265197754,
-      "learning_rate": 1.9510921616781844e-05,
-      "loss": 1.643,
-      "step": 344200
-    },
-    {
-      "epoch": 0.23952412315323354,
-      "grad_norm": 4.095102787017822,
-      "learning_rate": 1.9510359808856623e-05,
-      "loss": 1.6922,
-      "step": 344400
-    },
-    {
-      "epoch": 0.23966321962428652,
-      "grad_norm": 5.804976463317871,
-      "learning_rate": 1.950979769013429e-05,
-      "loss": 1.6055,
-      "step": 344600
-    },
-    {
-      "epoch": 0.2398023160953395,
-      "grad_norm": 9.323091506958008,
-      "learning_rate": 1.9509235260641682e-05,
-      "loss": 1.6792,
-      "step": 344800
-    },
-    {
-      "epoch": 0.23994141256639248,
-      "grad_norm": 3.1342084407806396,
-      "learning_rate": 1.950867252040566e-05,
-      "loss": 1.6612,
-      "step": 345000
-    },
-    {
-      "epoch": 0.24008050903744546,
-      "grad_norm": 5.803501605987549,
-      "learning_rate": 1.9508109469453075e-05,
-      "loss": 1.6417,
-      "step": 345200
-    },
-    {
-      "epoch": 0.24021960550849844,
-      "grad_norm": 2.7702744007110596,
-      "learning_rate": 1.9507546107810813e-05,
-      "loss": 1.6397,
-      "step": 345400
-    },
-    {
-      "epoch": 0.24035870197955142,
-      "grad_norm": 6.073428153991699,
-      "learning_rate": 1.9506982435505766e-05,
-      "loss": 1.6321,
-      "step": 345600
-    },
-    {
-      "epoch": 0.2404977984506044,
-      "grad_norm": 4.152920246124268,
-      "learning_rate": 1.9506418452564844e-05,
-      "loss": 1.6407,
-      "step": 345800
-    },
-    {
-      "epoch": 0.24063689492165738,
-      "grad_norm": 3.3686416149139404,
-      "learning_rate": 1.9505854159014972e-05,
-      "loss": 1.7123,
-      "step": 346000
-    },
-    {
-      "epoch": 0.24077599139271036,
-      "grad_norm": 4.510013103485107,
-      "learning_rate": 1.950528955488309e-05,
-      "loss": 1.6391,
-      "step": 346200
-    },
-    {
-      "epoch": 0.24091508786376334,
-      "grad_norm": 4.174516201019287,
-      "learning_rate": 1.9504724640196143e-05,
-      "loss": 1.669,
-      "step": 346400
-    },
-    {
-      "epoch": 0.24105418433481635,
-      "grad_norm": 4.815258979797363,
-      "learning_rate": 1.9504159414981112e-05,
-      "loss": 1.6308,
-      "step": 346600
-    },
-    {
-      "epoch": 0.24119328080586933,
-      "grad_norm": 3.8510098457336426,
-      "learning_rate": 1.950359387926497e-05,
-      "loss": 1.6688,
-      "step": 346800
-    },
-    {
-      "epoch": 0.2413323772769223,
-      "grad_norm": 6.116521835327148,
-      "learning_rate": 1.950302803307472e-05,
-      "loss": 1.6655,
-      "step": 347000
-    },
-    {
-      "epoch": 0.24147147374797528,
-      "grad_norm": 4.095193862915039,
-      "learning_rate": 1.9502461876437376e-05,
-      "loss": 1.6587,
-      "step": 347200
-    },
-    {
-      "epoch": 0.24161057021902826,
-      "grad_norm": 3.3929877281188965,
-      "learning_rate": 1.9501895409379958e-05,
-      "loss": 1.5897,
-      "step": 347400
-    },
-    {
-      "epoch": 0.24174966669008124,
-      "grad_norm": 4.79518461227417,
-      "learning_rate": 1.9501328631929515e-05,
-      "loss": 1.6804,
-      "step": 347600
-    },
-    {
-      "epoch": 0.24188876316113422,
-      "grad_norm": 4.848894119262695,
-      "learning_rate": 1.9500761544113106e-05,
-      "loss": 1.6742,
-      "step": 347800
-    },
-    {
-      "epoch": 0.2420278596321872,
-      "grad_norm": 4.406215667724609,
-      "learning_rate": 1.9500194145957797e-05,
-      "loss": 1.711,
-      "step": 348000
-    },
-    {
-      "epoch": 0.24216695610324018,
-      "grad_norm": 7.045769214630127,
-      "learning_rate": 1.949962643749068e-05,
-      "loss": 1.591,
-      "step": 348200
-    },
-    {
-      "epoch": 0.24230605257429316,
-      "grad_norm": 5.135491847991943,
-      "learning_rate": 1.9499058418738855e-05,
-      "loss": 1.6447,
-      "step": 348400
-    },
-    {
-      "epoch": 0.24244514904534614,
-      "grad_norm": 4.513916492462158,
-      "learning_rate": 1.9498490089729438e-05,
-      "loss": 1.6319,
-      "step": 348600
-    },
-    {
-      "epoch": 0.24258424551639912,
-      "grad_norm": 3.753251791000366,
-      "learning_rate": 1.949792145048956e-05,
-      "loss": 1.6632,
-      "step": 348800
-    },
-    {
-      "epoch": 0.2427233419874521,
-      "grad_norm": 3.935469150543213,
-      "learning_rate": 1.949735250104637e-05,
-      "loss": 1.678,
-      "step": 349000
-    },
-    {
-      "epoch": 0.24286243845850508,
-      "grad_norm": 5.35392951965332,
-      "learning_rate": 1.9496783241427026e-05,
-      "loss": 1.6673,
-      "step": 349200
-    },
-    {
-      "epoch": 0.24300153492955806,
-      "grad_norm": 4.7084879875183105,
-      "learning_rate": 1.9496213671658703e-05,
-      "loss": 1.6702,
-      "step": 349400
     }
   ],
   "logging_steps": 200,
@@ -12255,7 +12031,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.652032117813862e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.23855044785586269,
   "eval_steps": 500,
+  "global_step": 343000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9514285936093064e-05,
       "loss": 1.6889,
       "step": 343000
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 4.567214300600918e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:078136780f82e4c02daf15d387ce5ace039d2fead953e9b2034a974f0b6417e9
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f194f0afbf00cd135f18b6f6e0dc2d489f2d84487accfafc9254221384d4d16
 size 6840