Training in progress, step 1201
Browse files- adapter_config.json +1 -1
- adapter_model.safetensors +1 -1
- trainer_log.jsonl +5 -0
- training_args.bin +2 -2
adapter_config.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
-
"target_modules": "^(?!.*patch_embed).*(?:
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
+
"target_modules": "^(?!.*patch_embed).*(?:gate_proj|qkv|q_proj|fc1|fc2|up_proj|v_proj|down_proj|k_proj|proj|o_proj).*",
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8ee2634589442880539cea4215aeae5e195f22dea93b6441497472981b36691
|
3 |
size 29034840
|
trainer_log.jsonl
CHANGED
@@ -262,3 +262,8 @@
|
|
262 |
{"current_steps": 1195, "total_steps": 3400, "loss": 0.0007, "lr": 7.714352216101055e-05, "epoch": 0.6155034766932784, "percentage": 35.15, "elapsed_time": "5:54:05", "remaining_time": "10:53:21", "throughput": 561.58, "total_tokens": 11930880}
|
263 |
{"current_steps": 1200, "total_steps": 3400, "loss": 0.0038, "lr": 7.693899446759727e-05, "epoch": 0.6180788050476436, "percentage": 35.29, "elapsed_time": "5:55:30", "remaining_time": "10:51:46", "throughput": 561.67, "total_tokens": 11980800}
|
264 |
{"current_steps": 1200, "total_steps": 3400, "eval_loss": 0.0022160401567816734, "epoch": 0.6180788050476436, "percentage": 35.29, "elapsed_time": "5:55:49", "remaining_time": "10:52:20", "throughput": 561.17, "total_tokens": 11980800}
|
|
|
|
|
|
|
|
|
|
|
|
262 |
{"current_steps": 1195, "total_steps": 3400, "loss": 0.0007, "lr": 7.714352216101055e-05, "epoch": 0.6155034766932784, "percentage": 35.15, "elapsed_time": "5:54:05", "remaining_time": "10:53:21", "throughput": 561.58, "total_tokens": 11930880}
|
263 |
{"current_steps": 1200, "total_steps": 3400, "loss": 0.0038, "lr": 7.693899446759727e-05, "epoch": 0.6180788050476436, "percentage": 35.29, "elapsed_time": "5:55:30", "remaining_time": "10:51:46", "throughput": 561.67, "total_tokens": 11980800}
|
264 |
{"current_steps": 1200, "total_steps": 3400, "eval_loss": 0.0022160401567816734, "epoch": 0.6180788050476436, "percentage": 35.29, "elapsed_time": "5:55:49", "remaining_time": "10:52:20", "throughput": 561.17, "total_tokens": 11980800}
|
265 |
+
{"current_steps": 1205, "total_steps": 3400, "loss": 0.0002, "lr": 7.673382966299163e-05, "epoch": 0.6206541334020087, "percentage": 35.44, "elapsed_time": "5:57:20", "remaining_time": "10:50:55", "throughput": 561.12, "total_tokens": 12030720}
|
266 |
+
{"current_steps": 1210, "total_steps": 3400, "loss": 0.0262, "lr": 7.65280325993715e-05, "epoch": 0.623229461756374, "percentage": 35.59, "elapsed_time": "5:58:47", "remaining_time": "10:49:22", "throughput": 561.18, "total_tokens": 12080640}
|
267 |
+
{"current_steps": 1215, "total_steps": 3400, "loss": 0.0403, "lr": 7.63216081438678e-05, "epoch": 0.6258047901107391, "percentage": 35.74, "elapsed_time": "6:00:13", "remaining_time": "10:47:48", "throughput": 561.25, "total_tokens": 12130560}
|
268 |
+
{"current_steps": 1220, "total_steps": 3400, "loss": 0.002, "lr": 7.611456117844934e-05, "epoch": 0.6283801184651043, "percentage": 35.88, "elapsed_time": "6:01:39", "remaining_time": "10:46:14", "throughput": 561.32, "total_tokens": 12180480}
|
269 |
+
{"current_steps": 1201, "total_steps": 1200, "epoch": 0.6185938707185166, "percentage": 100.08, "elapsed_time": "0:01:42", "remaining_time": "0:00:00", "throughput": 117150.6, "total_tokens": 11990784}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d805df10b9aa9257d71d396f9c297d6e4b0be2e8da8eecef0b7efe313a60a238
|
3 |
+
size 7416
|