Training in progress, step 1050
Browse files- adapter_config.json +1 -1
- adapter_model.safetensors +1 -1
- trainer_log.jsonl +11 -0
- training_args.bin +2 -2
adapter_config.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
-
"target_modules": "^(?!.*patch_embed).*(?:
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
+
"target_modules": "^(?!.*patch_embed).*(?:fc1|proj|down_proj|fc2|qkv|o_proj|q_proj|k_proj|up_proj|gate_proj|v_proj).*",
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56eaf4c227ce8139a59064befffba82e062b43ce41d4a938db72d66c7114306e
|
3 |
size 29034840
|
trainer_log.jsonl
CHANGED
@@ -219,3 +219,14 @@
|
|
219 |
{"current_steps": 995, "total_steps": 6770, "loss": 0.3899, "lr": 9.74545148820259e-05, "epoch": 0.29394387001477107, "percentage": 14.7, "elapsed_time": "4:57:35", "remaining_time": "1 day, 4:47:15", "throughput": 578.41, "total_tokens": 10328048}
|
220 |
{"current_steps": 1000, "total_steps": 6770, "loss": 0.4005, "lr": 9.741590288143944e-05, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:02", "remaining_time": "1 day, 4:45:26", "throughput": 578.48, "total_tokens": 10379136}
|
221 |
{"current_steps": 1000, "total_steps": 6770, "eval_loss": 0.5501028299331665, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:21", "remaining_time": "1 day, 4:47:16", "throughput": 577.86, "total_tokens": 10379136}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
{"current_steps": 995, "total_steps": 6770, "loss": 0.3899, "lr": 9.74545148820259e-05, "epoch": 0.29394387001477107, "percentage": 14.7, "elapsed_time": "4:57:35", "remaining_time": "1 day, 4:47:15", "throughput": 578.41, "total_tokens": 10328048}
|
220 |
{"current_steps": 1000, "total_steps": 6770, "loss": 0.4005, "lr": 9.741590288143944e-05, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:02", "remaining_time": "1 day, 4:45:26", "throughput": 578.48, "total_tokens": 10379136}
|
221 |
{"current_steps": 1000, "total_steps": 6770, "eval_loss": 0.5501028299331665, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:21", "remaining_time": "1 day, 4:47:16", "throughput": 577.86, "total_tokens": 10379136}
|
222 |
+
{"current_steps": 1005, "total_steps": 6770, "loss": 0.4585, "lr": 9.737700799805191e-05, "epoch": 0.2968980797636632, "percentage": 14.84, "elapsed_time": "0:02:47", "remaining_time": "0:15:59", "throughput": 62352.42, "total_tokens": 10430680}
|
223 |
+
{"current_steps": 1010, "total_steps": 6770, "loss": 0.4257, "lr": 9.73378304639098e-05, "epoch": 0.2983751846381093, "percentage": 14.92, "elapsed_time": "0:04:15", "remaining_time": "0:24:18", "throughput": 40994.74, "total_tokens": 10482472}
|
224 |
+
{"current_steps": 1015, "total_steps": 6770, "loss": 0.4359, "lr": 9.729837051274591e-05, "epoch": 0.2998522895125554, "percentage": 14.99, "elapsed_time": "0:05:42", "remaining_time": "0:32:24", "throughput": 30720.21, "total_tokens": 10534392}
|
225 |
+
{"current_steps": 1020, "total_steps": 6770, "loss": 0.4158, "lr": 9.725862837997786e-05, "epoch": 0.30132939438700146, "percentage": 15.07, "elapsed_time": "0:07:11", "remaining_time": "0:40:30", "throughput": 24554.28, "total_tokens": 10586104}
|
226 |
+
{"current_steps": 1025, "total_steps": 6770, "loss": 0.4067, "lr": 9.721860430270685e-05, "epoch": 0.30280649926144754, "percentage": 15.14, "elapsed_time": "0:08:38", "remaining_time": "0:48:25", "throughput": 20517.46, "total_tokens": 10637560}
|
227 |
+
{"current_steps": 1030, "total_steps": 6770, "loss": 0.4811, "lr": 9.717829851971612e-05, "epoch": 0.30428360413589367, "percentage": 15.21, "elapsed_time": "0:10:06", "remaining_time": "0:56:22", "throughput": 17611.31, "total_tokens": 10689552}
|
228 |
+
{"current_steps": 1035, "total_steps": 6770, "loss": 0.4732, "lr": 9.713771127146955e-05, "epoch": 0.30576070901033975, "percentage": 15.29, "elapsed_time": "0:11:34", "remaining_time": "1:04:10", "throughput": 15458.65, "total_tokens": 10742208}
|
229 |
+
{"current_steps": 1040, "total_steps": 6770, "loss": 0.4735, "lr": 9.70968428001103e-05, "epoch": 0.3072378138847858, "percentage": 15.36, "elapsed_time": "0:13:03", "remaining_time": "1:11:59", "throughput": 13768.31, "total_tokens": 10794008}
|
230 |
+
{"current_steps": 1045, "total_steps": 6770, "loss": 0.4381, "lr": 9.705569334945921e-05, "epoch": 0.3087149187592319, "percentage": 15.44, "elapsed_time": "0:14:31", "remaining_time": "1:19:32", "throughput": 12450.04, "total_tokens": 10845736}
|
231 |
+
{"current_steps": 1050, "total_steps": 6770, "loss": 0.3991, "lr": 9.701426316501352e-05, "epoch": 0.310192023633678, "percentage": 15.51, "elapsed_time": "0:16:00", "remaining_time": "1:27:10", "throughput": 11349.91, "total_tokens": 10897528}
|
232 |
+
{"current_steps": 1050, "total_steps": 6770, "eval_loss": 0.4378110468387604, "epoch": 0.310192023633678, "percentage": 15.51, "elapsed_time": "0:16:47", "remaining_time": "1:31:27", "throughput": 10817.94, "total_tokens": 10897528}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfb4b4312b0dec56488d15e8a46a372ea451bb29daae5dc2f31b7c95a1a9b038
|
3 |
+
size 7480
|