Training in progress, step 1050

Browse files

Files changed (4) hide show

adapter_config.json +1 -1
adapter_model.safetensors +1 -1
trainer_log.jsonl +11 -0
training_args.bin +2 -2

adapter_config.json CHANGED Viewed

@@ -19,7 +19,7 @@
   "r": 8,
   "rank_pattern": {},
   "revision": null,
-  "target_modules": "^(?!.*patch_embed).*(?:o_proj|fc1|gate_proj|k_proj|down_proj|fc2|qkv|up_proj|v_proj|q_proj|proj).*",
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false

   "r": 8,
   "rank_pattern": {},
   "revision": null,
+  "target_modules": "^(?!.*patch_embed).*(?:fc1|proj|down_proj|fc2|qkv|o_proj|q_proj|k_proj|up_proj|gate_proj|v_proj).*",
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:877d476d50606e6083f6c78d34e77669367b6c6b165e3fabd4766d94d783d5e9
 size 29034840

 version https://git-lfs.github.com/spec/v1
+oid sha256:56eaf4c227ce8139a59064befffba82e062b43ce41d4a938db72d66c7114306e
 size 29034840

trainer_log.jsonl CHANGED Viewed

@@ -219,3 +219,14 @@
 {"current_steps": 995, "total_steps": 6770, "loss": 0.3899, "lr": 9.74545148820259e-05, "epoch": 0.29394387001477107, "percentage": 14.7, "elapsed_time": "4:57:35", "remaining_time": "1 day, 4:47:15", "throughput": 578.41, "total_tokens": 10328048}
 {"current_steps": 1000, "total_steps": 6770, "loss": 0.4005, "lr": 9.741590288143944e-05, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:02", "remaining_time": "1 day, 4:45:26", "throughput": 578.48, "total_tokens": 10379136}
 {"current_steps": 1000, "total_steps": 6770, "eval_loss": 0.5501028299331665, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:21", "remaining_time": "1 day, 4:47:16", "throughput": 577.86, "total_tokens": 10379136}

 {"current_steps": 995, "total_steps": 6770, "loss": 0.3899, "lr": 9.74545148820259e-05, "epoch": 0.29394387001477107, "percentage": 14.7, "elapsed_time": "4:57:35", "remaining_time": "1 day, 4:47:15", "throughput": 578.41, "total_tokens": 10328048}
 {"current_steps": 1000, "total_steps": 6770, "loss": 0.4005, "lr": 9.741590288143944e-05, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:02", "remaining_time": "1 day, 4:45:26", "throughput": 578.48, "total_tokens": 10379136}
 {"current_steps": 1000, "total_steps": 6770, "eval_loss": 0.5501028299331665, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:21", "remaining_time": "1 day, 4:47:16", "throughput": 577.86, "total_tokens": 10379136}
+{"current_steps": 1005, "total_steps": 6770, "loss": 0.4585, "lr": 9.737700799805191e-05, "epoch": 0.2968980797636632, "percentage": 14.84, "elapsed_time": "0:02:47", "remaining_time": "0:15:59", "throughput": 62352.42, "total_tokens": 10430680}
+{"current_steps": 1010, "total_steps": 6770, "loss": 0.4257, "lr": 9.73378304639098e-05, "epoch": 0.2983751846381093, "percentage": 14.92, "elapsed_time": "0:04:15", "remaining_time": "0:24:18", "throughput": 40994.74, "total_tokens": 10482472}
+{"current_steps": 1015, "total_steps": 6770, "loss": 0.4359, "lr": 9.729837051274591e-05, "epoch": 0.2998522895125554, "percentage": 14.99, "elapsed_time": "0:05:42", "remaining_time": "0:32:24", "throughput": 30720.21, "total_tokens": 10534392}
+{"current_steps": 1020, "total_steps": 6770, "loss": 0.4158, "lr": 9.725862837997786e-05, "epoch": 0.30132939438700146, "percentage": 15.07, "elapsed_time": "0:07:11", "remaining_time": "0:40:30", "throughput": 24554.28, "total_tokens": 10586104}
+{"current_steps": 1025, "total_steps": 6770, "loss": 0.4067, "lr": 9.721860430270685e-05, "epoch": 0.30280649926144754, "percentage": 15.14, "elapsed_time": "0:08:38", "remaining_time": "0:48:25", "throughput": 20517.46, "total_tokens": 10637560}
+{"current_steps": 1030, "total_steps": 6770, "loss": 0.4811, "lr": 9.717829851971612e-05, "epoch": 0.30428360413589367, "percentage": 15.21, "elapsed_time": "0:10:06", "remaining_time": "0:56:22", "throughput": 17611.31, "total_tokens": 10689552}
+{"current_steps": 1035, "total_steps": 6770, "loss": 0.4732, "lr": 9.713771127146955e-05, "epoch": 0.30576070901033975, "percentage": 15.29, "elapsed_time": "0:11:34", "remaining_time": "1:04:10", "throughput": 15458.65, "total_tokens": 10742208}
+{"current_steps": 1040, "total_steps": 6770, "loss": 0.4735, "lr": 9.70968428001103e-05, "epoch": 0.3072378138847858, "percentage": 15.36, "elapsed_time": "0:13:03", "remaining_time": "1:11:59", "throughput": 13768.31, "total_tokens": 10794008}
+{"current_steps": 1045, "total_steps": 6770, "loss": 0.4381, "lr": 9.705569334945921e-05, "epoch": 0.3087149187592319, "percentage": 15.44, "elapsed_time": "0:14:31", "remaining_time": "1:19:32", "throughput": 12450.04, "total_tokens": 10845736}
+{"current_steps": 1050, "total_steps": 6770, "loss": 0.3991, "lr": 9.701426316501352e-05, "epoch": 0.310192023633678, "percentage": 15.51, "elapsed_time": "0:16:00", "remaining_time": "1:27:10", "throughput": 11349.91, "total_tokens": 10897528}
+{"current_steps": 1050, "total_steps": 6770, "eval_loss": 0.4378110468387604, "epoch": 0.310192023633678, "percentage": 15.51, "elapsed_time": "0:16:47", "remaining_time": "1:31:27", "throughput": 10817.94, "total_tokens": 10897528}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db52c1adf656cdb1bd1aa2b486330c6e861e28b82d7249c1b1b421519e2085e4
-size 7416

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfb4b4312b0dec56488d15e8a46a372ea451bb29daae5dc2f31b7c95a1a9b038
+size 7480