Training in progress, step 1700
Browse files- adapter_config.json +1 -1
- adapter_model.safetensors +1 -1
- trainer_log.jsonl +22 -0
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
-
"target_modules": "^(?!.*patch_embed).*(?:
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
+
"target_modules": "^(?!.*patch_embed).*(?:q_proj|qkv|gate_proj|fc1|up_proj|k_proj|o_proj|fc2|down_proj|v_proj|proj).*",
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2509fd541c293692844871c3c3ef3462376501768f133b9acc8bdb3a34cf0369
|
3 |
size 29034840
|
trainer_log.jsonl
CHANGED
@@ -362,3 +362,25 @@
|
|
362 |
{"current_steps": 1645, "total_steps": 6770, "loss": 0.326, "lr": 9.016469573730869e-05, "epoch": 0.4859675036927622, "percentage": 24.3, "elapsed_time": "3:14:27", "remaining_time": "10:05:49", "throughput": 1463.76, "total_tokens": 17077904}
|
363 |
{"current_steps": 1650, "total_steps": 6770, "loss": 0.2698, "lr": 9.009183930952836e-05, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:15:53", "remaining_time": "10:07:50", "throughput": 1457.55, "total_tokens": 17130896}
|
364 |
{"current_steps": 1650, "total_steps": 6770, "eval_loss": 0.40201568603515625, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:16:12", "remaining_time": "10:08:51", "throughput": 1455.14, "total_tokens": 17130896}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
{"current_steps": 1645, "total_steps": 6770, "loss": 0.326, "lr": 9.016469573730869e-05, "epoch": 0.4859675036927622, "percentage": 24.3, "elapsed_time": "3:14:27", "remaining_time": "10:05:49", "throughput": 1463.76, "total_tokens": 17077904}
|
363 |
{"current_steps": 1650, "total_steps": 6770, "loss": 0.2698, "lr": 9.009183930952836e-05, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:15:53", "remaining_time": "10:07:50", "throughput": 1457.55, "total_tokens": 17130896}
|
364 |
{"current_steps": 1650, "total_steps": 6770, "eval_loss": 0.40201568603515625, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:16:12", "remaining_time": "10:08:51", "throughput": 1455.14, "total_tokens": 17130896}
|
365 |
+
{"current_steps": 1655, "total_steps": 6770, "loss": 0.2956, "lr": 9.00187436942368e-05, "epoch": 0.48892171344165436, "percentage": 24.45, "elapsed_time": "3:17:46", "remaining_time": "10:11:15", "throughput": 1448.0, "total_tokens": 17182896}
|
366 |
+
{"current_steps": 1660, "total_steps": 6770, "loss": 0.3027, "lr": 8.994540932752167e-05, "epoch": 0.49039881831610044, "percentage": 24.52, "elapsed_time": "3:19:14", "remaining_time": "10:13:19", "throughput": 1441.79, "total_tokens": 17235552}
|
367 |
+
{"current_steps": 1665, "total_steps": 6770, "loss": 0.3295, "lr": 8.987183664689511e-05, "epoch": 0.4918759231905465, "percentage": 24.59, "elapsed_time": "3:20:43", "remaining_time": "10:15:25", "throughput": 1435.39, "total_tokens": 17286816}
|
368 |
+
{"current_steps": 1670, "total_steps": 6770, "loss": 0.3201, "lr": 8.9798026091291e-05, "epoch": 0.4933530280649926, "percentage": 24.67, "elapsed_time": "3:22:11", "remaining_time": "10:17:28", "throughput": 1429.24, "total_tokens": 17339072}
|
369 |
+
{"current_steps": 1675, "total_steps": 6770, "loss": 0.3044, "lr": 8.972397810106235e-05, "epoch": 0.4948301329394387, "percentage": 24.74, "elapsed_time": "3:23:40", "remaining_time": "10:19:31", "throughput": 1423.15, "total_tokens": 17391288}
|
370 |
+
{"current_steps": 1680, "total_steps": 6770, "loss": 0.2781, "lr": 8.964969311797871e-05, "epoch": 0.4963072378138848, "percentage": 24.82, "elapsed_time": "3:25:07", "remaining_time": "10:21:28", "throughput": 1417.32, "total_tokens": 17443456}
|
371 |
+
{"current_steps": 1685, "total_steps": 6770, "loss": 0.423, "lr": 8.957517158522359e-05, "epoch": 0.4977843426883309, "percentage": 24.89, "elapsed_time": "3:26:34", "remaining_time": "10:23:23", "throughput": 1411.54, "total_tokens": 17494832}
|
372 |
+
{"current_steps": 1690, "total_steps": 6770, "loss": 0.2747, "lr": 8.950041394739168e-05, "epoch": 0.49926144756277696, "percentage": 24.96, "elapsed_time": "3:28:01", "remaining_time": "10:25:19", "throughput": 1405.84, "total_tokens": 17547384}
|
373 |
+
{"current_steps": 1695, "total_steps": 6770, "loss": 0.3162, "lr": 8.942542065048632e-05, "epoch": 0.5007385524372231, "percentage": 25.04, "elapsed_time": "3:29:30", "remaining_time": "10:27:15", "throughput": 1400.09, "total_tokens": 17599120}
|
374 |
+
{"current_steps": 1700, "total_steps": 6770, "loss": 0.3904, "lr": 8.935019214191672e-05, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "3:30:56", "remaining_time": "10:29:06", "throughput": 1394.6, "total_tokens": 17650984}
|
375 |
+
{"current_steps": 1700, "total_steps": 6770, "eval_loss": 0.3297054171562195, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "3:31:15", "remaining_time": "10:30:04", "throughput": 1392.48, "total_tokens": 17650984}
|
376 |
+
{"current_steps": 1655, "total_steps": 6770, "loss": 0.2956, "lr": 9.00187436942368e-05, "epoch": 0.48892171344165436, "percentage": 24.45, "elapsed_time": "0:02:46", "remaining_time": "0:08:35", "throughput": 103093.65, "total_tokens": 17182896}
|
377 |
+
{"current_steps": 1660, "total_steps": 6770, "loss": 0.3027, "lr": 8.994540932752167e-05, "epoch": 0.49039881831610044, "percentage": 24.52, "elapsed_time": "0:04:14", "remaining_time": "0:13:02", "throughput": 67768.5, "total_tokens": 17235552}
|
378 |
+
{"current_steps": 1665, "total_steps": 6770, "loss": 0.3295, "lr": 8.987183664689511e-05, "epoch": 0.4918759231905465, "percentage": 24.59, "elapsed_time": "0:05:40", "remaining_time": "0:17:24", "throughput": 50751.01, "total_tokens": 17286816}
|
379 |
+
{"current_steps": 1670, "total_steps": 6770, "loss": 0.3201, "lr": 8.9798026091291e-05, "epoch": 0.4933530280649926, "percentage": 24.67, "elapsed_time": "0:07:08", "remaining_time": "0:21:47", "throughput": 40490.2, "total_tokens": 17339072}
|
380 |
+
{"current_steps": 1675, "total_steps": 6770, "loss": 0.3044, "lr": 8.972397810106235e-05, "epoch": 0.4948301329394387, "percentage": 24.74, "elapsed_time": "0:08:34", "remaining_time": "0:26:04", "throughput": 33807.84, "total_tokens": 17391288}
|
381 |
+
{"current_steps": 1680, "total_steps": 6770, "loss": 0.2781, "lr": 8.964969311797871e-05, "epoch": 0.4963072378138848, "percentage": 24.82, "elapsed_time": "0:10:01", "remaining_time": "0:30:21", "throughput": 29020.42, "total_tokens": 17443456}
|
382 |
+
{"current_steps": 1685, "total_steps": 6770, "loss": 0.423, "lr": 8.957517158522359e-05, "epoch": 0.4977843426883309, "percentage": 24.89, "elapsed_time": "0:11:26", "remaining_time": "0:34:31", "throughput": 25485.2, "total_tokens": 17494832}
|
383 |
+
{"current_steps": 1690, "total_steps": 6770, "loss": 0.2747, "lr": 8.950041394739168e-05, "epoch": 0.49926144756277696, "percentage": 24.96, "elapsed_time": "0:12:53", "remaining_time": "0:38:45", "throughput": 22678.11, "total_tokens": 17547384}
|
384 |
+
{"current_steps": 1695, "total_steps": 6770, "loss": 0.3162, "lr": 8.942542065048632e-05, "epoch": 0.5007385524372231, "percentage": 25.04, "elapsed_time": "0:14:20", "remaining_time": "0:42:55", "throughput": 20457.93, "total_tokens": 17599120}
|
385 |
+
{"current_steps": 1700, "total_steps": 6770, "loss": 0.3904, "lr": 8.935019214191672e-05, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "0:15:49", "remaining_time": "0:47:11", "throughput": 18591.57, "total_tokens": 17650984}
|
386 |
+
{"current_steps": 1700, "total_steps": 6770, "eval_loss": 0.3297054171562195, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "0:16:37", "remaining_time": "0:49:34", "throughput": 17697.21, "total_tokens": 17650984}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7480
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:157f40a64e5df8b268b803366ba3fce3e3b5e6e2f20d7c81bfc44726f0bee4e8
|
3 |
size 7480
|