Training in progress, step 343000

Browse files

Files changed (8) hide show

adapter_model.safetensors +1 -1
last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +3 -227
last-checkpoint/training_args.bin +1 -1

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e75eac46e08e30439a8f1d1cf16b8bc94879ce5e8b6daabed28b0eb7d25334a
 size 1342238560

 version https://git-lfs.github.com/spec/v1
+oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
 size 1342238560

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "down_proj",
-    "v_proj",
-    "k_proj",
     "gate_proj",
     "o_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "up_proj",
     "gate_proj",
+    "k_proj",
+    "v_proj",
+    "q_proj",
     "o_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e75eac46e08e30439a8f1d1cf16b8bc94879ce5e8b6daabed28b0eb7d25334a
 size 1342238560

 version https://git-lfs.github.com/spec/v1
+oid sha256:77ccc7e48ccdbe805a596485ddd3dbadcee2e22d9ba053f7df72c11bef42dd8e
 size 1342238560

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98a57ae5d41478c0cac2023cbd05e78ba9a9d963c6f2c28cd16143dbd1074838
 size 683268498

 version https://git-lfs.github.com/spec/v1
+oid sha256:a674a1bf3923257c0203aec537f3903553312ad604dd2b86b9a2d5cd0ddb714f
 size 683268498

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15944d6ef476b38df570ff9d64bfc643ed8d2f56e7dd50a3fb10bb256d3f67b4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a14ac9e461892314d6ba767ae6fbf3be389395cb4fe125c43f81c17b334c00ce
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90e6f05351a0e7875d766d1c3a2bd92efaab5a08a7f356642a5d985941346bd0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bf5cb7d0fbd840eda73ead5e2ccb0627aa96feb1ef96682b5cfe40a387534d6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2384113513848097,
   "eval_steps": 500,
-  "global_step": 342800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11781,230 +11781,6 @@
       "learning_rate": 1.9532589419723944e-05,
       "loss": 1.7161,
       "step": 336400
-    },
-    {
-      "epoch": 0.23409936078216728,
-      "grad_norm": 5.75113582611084,
-      "learning_rate": 1.9532039753658822e-05,
-      "loss": 1.6752,
-      "step": 336600
-    },
-    {
-      "epoch": 0.23423845725322026,
-      "grad_norm": 3.8082878589630127,
-      "learning_rate": 1.9531489775761617e-05,
-      "loss": 1.6679,
-      "step": 336800
-    },
-    {
-      "epoch": 0.23437755372427324,
-      "grad_norm": 4.37647819519043,
-      "learning_rate": 1.953093948605858e-05,
-      "loss": 1.6643,
-      "step": 337000
-    },
-    {
-      "epoch": 0.23451665019532622,
-      "grad_norm": 5.018675327301025,
-      "learning_rate": 1.953038888457599e-05,
-      "loss": 1.6606,
-      "step": 337200
-    },
-    {
-      "epoch": 0.2346557466663792,
-      "grad_norm": 5.047998905181885,
-      "learning_rate": 1.952983797134013e-05,
-      "loss": 1.6508,
-      "step": 337400
-    },
-    {
-      "epoch": 0.23479484313743218,
-      "grad_norm": 7.279408931732178,
-      "learning_rate": 1.95292867463773e-05,
-      "loss": 1.6547,
-      "step": 337600
-    },
-    {
-      "epoch": 0.23493393960848516,
-      "grad_norm": 6.7975382804870605,
-      "learning_rate": 1.9528735209713808e-05,
-      "loss": 1.6461,
-      "step": 337800
-    },
-    {
-      "epoch": 0.23507303607953814,
-      "grad_norm": 7.198062896728516,
-      "learning_rate": 1.9528183361375986e-05,
-      "loss": 1.6954,
-      "step": 338000
-    },
-    {
-      "epoch": 0.23521213255059112,
-      "grad_norm": 4.493501663208008,
-      "learning_rate": 1.9527631201390185e-05,
-      "loss": 1.6956,
-      "step": 338200
-    },
-    {
-      "epoch": 0.2353512290216441,
-      "grad_norm": 4.0898118019104,
-      "learning_rate": 1.952707872978276e-05,
-      "loss": 1.6233,
-      "step": 338400
-    },
-    {
-      "epoch": 0.23549032549269708,
-      "grad_norm": 3.5022025108337402,
-      "learning_rate": 1.952652594658009e-05,
-      "loss": 1.6675,
-      "step": 338600
-    },
-    {
-      "epoch": 0.23562942196375006,
-      "grad_norm": 3.9198243618011475,
-      "learning_rate": 1.9525972851808555e-05,
-      "loss": 1.6433,
-      "step": 338800
-    },
-    {
-      "epoch": 0.23576851843480304,
-      "grad_norm": 4.736083507537842,
-      "learning_rate": 1.9525419445494563e-05,
-      "loss": 1.6486,
-      "step": 339000
-    },
-    {
-      "epoch": 0.23590761490585604,
-      "grad_norm": 3.913604259490967,
-      "learning_rate": 1.952486572766454e-05,
-      "loss": 1.5873,
-      "step": 339200
-    },
-    {
-      "epoch": 0.23604671137690902,
-      "grad_norm": 4.593210220336914,
-      "learning_rate": 1.9524311698344908e-05,
-      "loss": 1.696,
-      "step": 339400
-    },
-    {
-      "epoch": 0.236185807847962,
-      "grad_norm": 12.825864791870117,
-      "learning_rate": 1.9523757357562124e-05,
-      "loss": 1.6756,
-      "step": 339600
-    },
-    {
-      "epoch": 0.23632490431901498,
-      "grad_norm": 3.4124608039855957,
-      "learning_rate": 1.9523202705342653e-05,
-      "loss": 1.6614,
-      "step": 339800
-    },
-    {
-      "epoch": 0.23646400079006796,
-      "grad_norm": 3.605181932449341,
-      "learning_rate": 1.9522647741712966e-05,
-      "loss": 1.6916,
-      "step": 340000
-    },
-    {
-      "epoch": 0.23660309726112094,
-      "grad_norm": 5.278689384460449,
-      "learning_rate": 1.952209246669956e-05,
-      "loss": 1.6617,
-      "step": 340200
-    },
-    {
-      "epoch": 0.23674219373217392,
-      "grad_norm": 5.578737258911133,
-      "learning_rate": 1.9521536880328943e-05,
-      "loss": 1.7077,
-      "step": 340400
-    },
-    {
-      "epoch": 0.2368812902032269,
-      "grad_norm": 4.157208442687988,
-      "learning_rate": 1.9520980982627642e-05,
-      "loss": 1.6824,
-      "step": 340600
-    },
-    {
-      "epoch": 0.23702038667427988,
-      "grad_norm": 3.1329407691955566,
-      "learning_rate": 1.9520424773622193e-05,
-      "loss": 1.6559,
-      "step": 340800
-    },
-    {
-      "epoch": 0.23715948314533286,
-      "grad_norm": 4.475450038909912,
-      "learning_rate": 1.951986825333914e-05,
-      "loss": 1.7017,
-      "step": 341000
-    },
-    {
-      "epoch": 0.23729857961638584,
-      "grad_norm": 4.912330627441406,
-      "learning_rate": 1.9519311421805062e-05,
-      "loss": 1.6263,
-      "step": 341200
-    },
-    {
-      "epoch": 0.23743767608743882,
-      "grad_norm": 6.892397403717041,
-      "learning_rate": 1.951875427904654e-05,
-      "loss": 1.7071,
-      "step": 341400
-    },
-    {
-      "epoch": 0.2375767725584918,
-      "grad_norm": 4.659296989440918,
-      "learning_rate": 1.9518196825090167e-05,
-      "loss": 1.6526,
-      "step": 341600
-    },
-    {
-      "epoch": 0.23771586902954478,
-      "grad_norm": 7.2321977615356445,
-      "learning_rate": 1.9517639059962558e-05,
-      "loss": 1.619,
-      "step": 341800
-    },
-    {
-      "epoch": 0.23785496550059776,
-      "grad_norm": 4.7723283767700195,
-      "learning_rate": 1.951708098369033e-05,
-      "loss": 1.6601,
-      "step": 342000
-    },
-    {
-      "epoch": 0.23799406197165074,
-      "grad_norm": 4.46943473815918,
-      "learning_rate": 1.951652259630014e-05,
-      "loss": 1.6552,
-      "step": 342200
-    },
-    {
-      "epoch": 0.23813315844270372,
-      "grad_norm": 3.9207563400268555,
-      "learning_rate": 1.951596389781864e-05,
-      "loss": 1.6588,
-      "step": 342400
-    },
-    {
-      "epoch": 0.2382722549137567,
-      "grad_norm": 4.317783355712891,
-      "learning_rate": 1.95154048882725e-05,
-      "loss": 1.6362,
-      "step": 342600
-    },
-    {
-      "epoch": 0.2384113513848097,
-      "grad_norm": 4.8455939292907715,
-      "learning_rate": 1.9514845567688408e-05,
-      "loss": 1.6518,
-      "step": 342800
     }
   ],
   "logging_steps": 200,
@@ -12024,7 +11800,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.5645148898507244e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2339602643111143,
   "eval_steps": 500,
+  "global_step": 336400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9532589419723944e-05,
       "loss": 1.7161,
       "step": 336400
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 4.4789352142658273e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f194f0afbf00cd135f18b6f6e0dc2d489f2d84487accfafc9254221384d4d16
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6420e27a1743978dbb7f5afac4bb71396b01a4362c274caf9f98fc91a6bd501
 size 6840