Training in progress, step 650
Browse files- adapter_config.json +1 -1
- adapter_model.safetensors +1 -1
- tokenizer_config.json +0 -1
- trainer_log.jsonl +15 -0
- training_args.bin +2 -2
adapter_config.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
-
"target_modules": "^(?!.*patch_embed).*(?:
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
+
"target_modules": "^(?!.*patch_embed).*(?:up_proj|proj|fc2|fc1|qkv|o_proj|k_proj|gate_proj|v_proj|down_proj|q_proj).*",
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a040fdacc28b9de2977636cd1c1956bffb7f97df44e5bd2640662f6a98fa674b
|
3 |
size 29034840
|
tokenizer_config.json
CHANGED
@@ -137,7 +137,6 @@
|
|
137 |
"model_max_length": 32768,
|
138 |
"pad_token": "<|endoftext|>",
|
139 |
"padding_side": "right",
|
140 |
-
"processor_class": "Qwen2VLProcessor",
|
141 |
"split_special_tokens": false,
|
142 |
"tokenizer_class": "Qwen2Tokenizer",
|
143 |
"unk_token": null
|
|
|
137 |
"model_max_length": 32768,
|
138 |
"pad_token": "<|endoftext|>",
|
139 |
"padding_side": "right",
|
|
|
140 |
"split_special_tokens": false,
|
141 |
"tokenizer_class": "Qwen2Tokenizer",
|
142 |
"unk_token": null
|
trainer_log.jsonl
CHANGED
@@ -130,3 +130,18 @@
|
|
130 |
{"current_steps": 595, "total_steps": 3400, "loss": 0.5644, "lr": 9.578866633275288e-05, "epoch": 0.1532320370847283, "percentage": 17.5, "elapsed_time": "3:04:25", "remaining_time": "14:29:26", "throughput": 564.6, "total_tokens": 6247592}
|
131 |
{"current_steps": 600, "total_steps": 3400, "loss": 0.5794, "lr": 9.569045007802559e-05, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:05:53", "remaining_time": "14:27:29", "throughput": 564.81, "total_tokens": 6299656}
|
132 |
{"current_steps": 600, "total_steps": 3400, "eval_loss": 0.6039358973503113, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:06:31", "remaining_time": "14:30:28", "throughput": 562.88, "total_tokens": 6299656}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
{"current_steps": 595, "total_steps": 3400, "loss": 0.5644, "lr": 9.578866633275288e-05, "epoch": 0.1532320370847283, "percentage": 17.5, "elapsed_time": "3:04:25", "remaining_time": "14:29:26", "throughput": 564.6, "total_tokens": 6247592}
|
131 |
{"current_steps": 600, "total_steps": 3400, "loss": 0.5794, "lr": 9.569045007802559e-05, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:05:53", "remaining_time": "14:27:29", "throughput": 564.81, "total_tokens": 6299656}
|
132 |
{"current_steps": 600, "total_steps": 3400, "eval_loss": 0.6039358973503113, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:06:31", "remaining_time": "14:30:28", "throughput": 562.88, "total_tokens": 6299656}
|
133 |
+
{"current_steps": 605, "total_steps": 3400, "loss": 0.6032, "lr": 9.55911532374151e-05, "epoch": 0.1558073654390935, "percentage": 17.79, "elapsed_time": "3:08:04", "remaining_time": "14:28:50", "throughput": 562.89, "total_tokens": 6351664}
|
134 |
+
{"current_steps": 610, "total_steps": 3400, "loss": 0.5942, "lr": 9.549077815930636e-05, "epoch": 0.15709502961627608, "percentage": 17.94, "elapsed_time": "3:09:32", "remaining_time": "14:26:53", "throughput": 563.1, "total_tokens": 6403688}
|
135 |
+
{"current_steps": 615, "total_steps": 3400, "loss": 0.5643, "lr": 9.538932721758474e-05, "epoch": 0.15838269379345868, "percentage": 18.09, "elapsed_time": "3:10:58", "remaining_time": "14:24:50", "throughput": 563.44, "total_tokens": 6456344}
|
136 |
+
{"current_steps": 620, "total_steps": 3400, "loss": 0.5914, "lr": 9.528680281157999e-05, "epoch": 0.15967035797064125, "percentage": 18.24, "elapsed_time": "3:12:26", "remaining_time": "14:22:51", "throughput": 563.74, "total_tokens": 6509000}
|
137 |
+
{"current_steps": 605, "total_steps": 3400, "loss": 0.6106, "lr": 9.55911532374151e-05, "epoch": 0.1558073654390935, "percentage": 17.79, "elapsed_time": "0:02:45", "remaining_time": "0:12:42", "throughput": 38471.59, "total_tokens": 6351680}
|
138 |
+
{"current_steps": 610, "total_steps": 3400, "loss": 0.5812, "lr": 9.549077815930636e-05, "epoch": 0.15709502961627608, "percentage": 17.94, "elapsed_time": "0:04:14", "remaining_time": "0:19:21", "throughput": 25207.5, "total_tokens": 6403648}
|
139 |
+
{"current_steps": 615, "total_steps": 3400, "loss": 0.5992, "lr": 9.538932721758474e-05, "epoch": 0.15838269379345868, "percentage": 18.09, "elapsed_time": "0:05:41", "remaining_time": "0:25:47", "throughput": 18896.22, "total_tokens": 6456328}
|
140 |
+
{"current_steps": 620, "total_steps": 3400, "loss": 0.587, "lr": 9.528680281157999e-05, "epoch": 0.15967035797064125, "percentage": 18.24, "elapsed_time": "0:07:10", "remaining_time": "0:32:11", "throughput": 15113.22, "total_tokens": 6509024}
|
141 |
+
{"current_steps": 625, "total_steps": 3400, "loss": 0.5836, "lr": 9.518320736600943e-05, "epoch": 0.16095802214782384, "percentage": 18.38, "elapsed_time": "0:08:38", "remaining_time": "0:38:21", "throughput": 12658.21, "total_tokens": 6561336}
|
142 |
+
{"current_steps": 630, "total_steps": 3400, "loss": 0.5913, "lr": 9.507854333092063e-05, "epoch": 0.16224568632500644, "percentage": 18.53, "elapsed_time": "0:10:07", "remaining_time": "0:44:29", "throughput": 10895.64, "total_tokens": 6614024}
|
143 |
+
{"current_steps": 635, "total_steps": 3400, "loss": 0.5693, "lr": 9.497281318163346e-05, "epoch": 0.16353335050218903, "percentage": 18.68, "elapsed_time": "0:11:34", "remaining_time": "0:50:24", "throughput": 9595.98, "total_tokens": 6666416}
|
144 |
+
{"current_steps": 640, "total_steps": 3400, "loss": 0.572, "lr": 9.486601941868154e-05, "epoch": 0.16482101467937163, "percentage": 18.82, "elapsed_time": "0:13:04", "remaining_time": "0:56:24", "throughput": 8560.2, "total_tokens": 6718200}
|
145 |
+
{"current_steps": 645, "total_steps": 3400, "loss": 0.6111, "lr": 9.475816456775313e-05, "epoch": 0.1661086788565542, "percentage": 18.97, "elapsed_time": "0:14:35", "remaining_time": "1:02:19", "throughput": 7734.51, "total_tokens": 6771256}
|
146 |
+
{"current_steps": 650, "total_steps": 3400, "loss": 0.5959, "lr": 9.464925117963133e-05, "epoch": 0.1673963430337368, "percentage": 19.12, "elapsed_time": "0:16:07", "remaining_time": "1:08:11", "throughput": 7056.83, "total_tokens": 6824008}
|
147 |
+
{"current_steps": 650, "total_steps": 3400, "eval_loss": 0.5542036890983582, "epoch": 0.1673963430337368, "percentage": 19.12, "elapsed_time": "0:17:15", "remaining_time": "1:13:02", "throughput": 6587.43, "total_tokens": 6824008}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b2ceb98c0b4a50d3909c4f866386ff0e5093b24fd71a2054110090af7b4ef0e
|
3 |
+
size 7416
|