Training in progress, step 343000, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +234 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
-    "gate_proj",
-    "k_proj",
-    "v_proj",
     "q_proj",
     "o_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
+    "down_proj",
+    "v_proj",
+    "k_proj",
+    "gate_proj",
     "o_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77ccc7e48ccdbe805a596485ddd3dbadcee2e22d9ba053f7df72c11bef42dd8e
 size 1342238560

 version https://git-lfs.github.com/spec/v1
+oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
 size 1342238560

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a674a1bf3923257c0203aec537f3903553312ad604dd2b86b9a2d5cd0ddb714f
 size 683268498

 version https://git-lfs.github.com/spec/v1
+oid sha256:781887d172314801ab8802842158c08145ef998a6a80b07686139a50d9285ded
 size 683268498

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a14ac9e461892314d6ba767ae6fbf3be389395cb4fe125c43f81c17b334c00ce
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e85f0257d01a91ff4050d39219e8dd384bbb4cfdc5b2e0fb4fabf6b2fe3b33e2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bf5cb7d0fbd840eda73ead5e2ccb0627aa96feb1ef96682b5cfe40a387534d6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:840aa8e3a2615e43038d3be582aa3892a5d4ec1157dbf18b35d8a9ff2904fee4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2339602643111143,
   "eval_steps": 500,
-  "global_step": 336400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11781,6 +11781,237 @@
       "learning_rate": 1.9532589419723944e-05,
       "loss": 1.7161,
       "step": 336400
     }
   ],
   "logging_steps": 200,
@@ -11800,7 +12031,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.4789352142658273e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.23855044785586269,
   "eval_steps": 500,
+  "global_step": 343000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9532589419723944e-05,
       "loss": 1.7161,
       "step": 336400
+    },
+    {
+      "epoch": 0.23409936078216728,
+      "grad_norm": 5.75113582611084,
+      "learning_rate": 1.9532039753658822e-05,
+      "loss": 1.6752,
+      "step": 336600
+    },
+    {
+      "epoch": 0.23423845725322026,
+      "grad_norm": 3.8082878589630127,
+      "learning_rate": 1.9531489775761617e-05,
+      "loss": 1.6679,
+      "step": 336800
+    },
+    {
+      "epoch": 0.23437755372427324,
+      "grad_norm": 4.37647819519043,
+      "learning_rate": 1.953093948605858e-05,
+      "loss": 1.6643,
+      "step": 337000
+    },
+    {
+      "epoch": 0.23451665019532622,
+      "grad_norm": 5.018675327301025,
+      "learning_rate": 1.953038888457599e-05,
+      "loss": 1.6606,
+      "step": 337200
+    },
+    {
+      "epoch": 0.2346557466663792,
+      "grad_norm": 5.047998905181885,
+      "learning_rate": 1.952983797134013e-05,
+      "loss": 1.6508,
+      "step": 337400
+    },
+    {
+      "epoch": 0.23479484313743218,
+      "grad_norm": 7.279408931732178,
+      "learning_rate": 1.95292867463773e-05,
+      "loss": 1.6547,
+      "step": 337600
+    },
+    {
+      "epoch": 0.23493393960848516,
+      "grad_norm": 6.7975382804870605,
+      "learning_rate": 1.9528735209713808e-05,
+      "loss": 1.6461,
+      "step": 337800
+    },
+    {
+      "epoch": 0.23507303607953814,
+      "grad_norm": 7.198062896728516,
+      "learning_rate": 1.9528183361375986e-05,
+      "loss": 1.6954,
+      "step": 338000
+    },
+    {
+      "epoch": 0.23521213255059112,
+      "grad_norm": 4.493501663208008,
+      "learning_rate": 1.9527631201390185e-05,
+      "loss": 1.6956,
+      "step": 338200
+    },
+    {
+      "epoch": 0.2353512290216441,
+      "grad_norm": 4.0898118019104,
+      "learning_rate": 1.952707872978276e-05,
+      "loss": 1.6233,
+      "step": 338400
+    },
+    {
+      "epoch": 0.23549032549269708,
+      "grad_norm": 3.5022025108337402,
+      "learning_rate": 1.952652594658009e-05,
+      "loss": 1.6675,
+      "step": 338600
+    },
+    {
+      "epoch": 0.23562942196375006,
+      "grad_norm": 3.9198243618011475,
+      "learning_rate": 1.9525972851808555e-05,
+      "loss": 1.6433,
+      "step": 338800
+    },
+    {
+      "epoch": 0.23576851843480304,
+      "grad_norm": 4.736083507537842,
+      "learning_rate": 1.9525419445494563e-05,
+      "loss": 1.6486,
+      "step": 339000
+    },
+    {
+      "epoch": 0.23590761490585604,
+      "grad_norm": 3.913604259490967,
+      "learning_rate": 1.952486572766454e-05,
+      "loss": 1.5873,
+      "step": 339200
+    },
+    {
+      "epoch": 0.23604671137690902,
+      "grad_norm": 4.593210220336914,
+      "learning_rate": 1.9524311698344908e-05,
+      "loss": 1.696,
+      "step": 339400
+    },
+    {
+      "epoch": 0.236185807847962,
+      "grad_norm": 12.825864791870117,
+      "learning_rate": 1.9523757357562124e-05,
+      "loss": 1.6756,
+      "step": 339600
+    },
+    {
+      "epoch": 0.23632490431901498,
+      "grad_norm": 3.4124608039855957,
+      "learning_rate": 1.9523202705342653e-05,
+      "loss": 1.6614,
+      "step": 339800
+    },
+    {
+      "epoch": 0.23646400079006796,
+      "grad_norm": 3.605181932449341,
+      "learning_rate": 1.9522647741712966e-05,
+      "loss": 1.6916,
+      "step": 340000
+    },
+    {
+      "epoch": 0.23660309726112094,
+      "grad_norm": 5.278689384460449,
+      "learning_rate": 1.952209246669956e-05,
+      "loss": 1.6617,
+      "step": 340200
+    },
+    {
+      "epoch": 0.23674219373217392,
+      "grad_norm": 5.578737258911133,
+      "learning_rate": 1.9521536880328943e-05,
+      "loss": 1.7077,
+      "step": 340400
+    },
+    {
+      "epoch": 0.2368812902032269,
+      "grad_norm": 4.157208442687988,
+      "learning_rate": 1.9520980982627642e-05,
+      "loss": 1.6824,
+      "step": 340600
+    },
+    {
+      "epoch": 0.23702038667427988,
+      "grad_norm": 3.1329407691955566,
+      "learning_rate": 1.9520424773622193e-05,
+      "loss": 1.6559,
+      "step": 340800
+    },
+    {
+      "epoch": 0.23715948314533286,
+      "grad_norm": 4.475450038909912,
+      "learning_rate": 1.951986825333914e-05,
+      "loss": 1.7017,
+      "step": 341000
+    },
+    {
+      "epoch": 0.23729857961638584,
+      "grad_norm": 4.912330627441406,
+      "learning_rate": 1.9519311421805062e-05,
+      "loss": 1.6263,
+      "step": 341200
+    },
+    {
+      "epoch": 0.23743767608743882,
+      "grad_norm": 6.892397403717041,
+      "learning_rate": 1.951875427904654e-05,
+      "loss": 1.7071,
+      "step": 341400
+    },
+    {
+      "epoch": 0.2375767725584918,
+      "grad_norm": 4.659296989440918,
+      "learning_rate": 1.9518196825090167e-05,
+      "loss": 1.6526,
+      "step": 341600
+    },
+    {
+      "epoch": 0.23771586902954478,
+      "grad_norm": 7.2321977615356445,
+      "learning_rate": 1.9517639059962558e-05,
+      "loss": 1.619,
+      "step": 341800
+    },
+    {
+      "epoch": 0.23785496550059776,
+      "grad_norm": 4.7723283767700195,
+      "learning_rate": 1.951708098369033e-05,
+      "loss": 1.6601,
+      "step": 342000
+    },
+    {
+      "epoch": 0.23799406197165074,
+      "grad_norm": 4.46943473815918,
+      "learning_rate": 1.951652259630014e-05,
+      "loss": 1.6552,
+      "step": 342200
+    },
+    {
+      "epoch": 0.23813315844270372,
+      "grad_norm": 3.9207563400268555,
+      "learning_rate": 1.951596389781864e-05,
+      "loss": 1.6588,
+      "step": 342400
+    },
+    {
+      "epoch": 0.2382722549137567,
+      "grad_norm": 4.317783355712891,
+      "learning_rate": 1.95154048882725e-05,
+      "loss": 1.6362,
+      "step": 342600
+    },
+    {
+      "epoch": 0.2384113513848097,
+      "grad_norm": 4.8455939292907715,
+      "learning_rate": 1.9514845567688408e-05,
+      "loss": 1.6518,
+      "step": 342800
+    },
+    {
+      "epoch": 0.23855044785586269,
+      "grad_norm": 7.664321422576904,
+      "learning_rate": 1.9514285936093064e-05,
+      "loss": 1.6889,
+      "step": 343000
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 4.567214300600918e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6420e27a1743978dbb7f5afac4bb71396b01a4362c274caf9f98fc91a6bd501
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f194f0afbf00cd135f18b6f6e0dc2d489f2d84487accfafc9254221384d4d16
 size 6840