Training in progress, step 2600
Browse files- adapter_config.json +1 -1
- adapter_model.safetensors +1 -1
- tokenizer_config.json +0 -1
- trainer_log.jsonl +10 -0
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
-
"target_modules": "^(?!.*patch_embed).*(?:
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
+
"target_modules": "^(?!.*patch_embed).*(?:q_proj|proj|k_proj|v_proj|gate_proj|qkv|fc1|down_proj|up_proj|fc2|o_proj).*",
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9418bf839b17997d0032898161f489316f534597155456a79fb57e1a059c9d3
|
3 |
size 29034840
|
tokenizer_config.json
CHANGED
@@ -137,7 +137,6 @@
|
|
137 |
"model_max_length": 32768,
|
138 |
"pad_token": "<|endoftext|>",
|
139 |
"padding_side": "right",
|
140 |
-
"processor_class": "Qwen2VLProcessor",
|
141 |
"split_special_tokens": false,
|
142 |
"tokenizer_class": "Qwen2Tokenizer",
|
143 |
"unk_token": null
|
|
|
137 |
"model_max_length": 32768,
|
138 |
"pad_token": "<|endoftext|>",
|
139 |
"padding_side": "right",
|
|
|
140 |
"split_special_tokens": false,
|
141 |
"tokenizer_class": "Qwen2Tokenizer",
|
142 |
"unk_token": null
|
trainer_log.jsonl
CHANGED
@@ -563,3 +563,13 @@
|
|
563 |
{"current_steps": 2545, "total_steps": 3400, "loss": 0.3622, "lr": 1.631521781767214e-05, "epoch": 0.6554210661859388, "percentage": 74.85, "elapsed_time": "10:01:25", "remaining_time": "3:22:03", "throughput": 740.58, "total_tokens": 26724488}
|
564 |
{"current_steps": 2550, "total_steps": 3400, "loss": 0.3195, "lr": 1.6135921418712956e-05, "epoch": 0.6567087303631213, "percentage": 75.0, "elapsed_time": "10:02:54", "remaining_time": "3:20:58", "throughput": 740.22, "total_tokens": 26776816}
|
565 |
{"current_steps": 2550, "total_steps": 3400, "eval_loss": 0.43731561303138733, "epoch": 0.6567087303631213, "percentage": 75.0, "elapsed_time": "10:03:32", "remaining_time": "3:21:10", "throughput": 739.43, "total_tokens": 26776816}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
563 |
{"current_steps": 2545, "total_steps": 3400, "loss": 0.3622, "lr": 1.631521781767214e-05, "epoch": 0.6554210661859388, "percentage": 74.85, "elapsed_time": "10:01:25", "remaining_time": "3:22:03", "throughput": 740.58, "total_tokens": 26724488}
|
564 |
{"current_steps": 2550, "total_steps": 3400, "loss": 0.3195, "lr": 1.6135921418712956e-05, "epoch": 0.6567087303631213, "percentage": 75.0, "elapsed_time": "10:02:54", "remaining_time": "3:20:58", "throughput": 740.22, "total_tokens": 26776816}
|
565 |
{"current_steps": 2550, "total_steps": 3400, "eval_loss": 0.43731561303138733, "epoch": 0.6567087303631213, "percentage": 75.0, "elapsed_time": "10:03:32", "remaining_time": "3:21:10", "throughput": 739.43, "total_tokens": 26776816}
|
566 |
+
{"current_steps": 2560, "total_steps": 3400, "loss": 0.281, "lr": 1.577973551359877e-05, "epoch": 0.6592840587174865, "percentage": 75.29, "elapsed_time": "0:04:12", "remaining_time": "0:01:22", "throughput": 106495.52, "total_tokens": 26881272}
|
567 |
+
{"current_steps": 2565, "total_steps": 3400, "loss": 0.2814, "lr": 1.560285443129296e-05, "epoch": 0.660571722894669, "percentage": 75.44, "elapsed_time": "0:05:39", "remaining_time": "0:01:50", "throughput": 79391.53, "total_tokens": 26934104}
|
568 |
+
{"current_steps": 2570, "total_steps": 3400, "loss": 0.2602, "lr": 1.542678684655306e-05, "epoch": 0.6618593870718517, "percentage": 75.59, "elapsed_time": "0:07:06", "remaining_time": "0:02:17", "throughput": 63238.19, "total_tokens": 26986248}
|
569 |
+
{"current_steps": 2575, "total_steps": 3400, "loss": 0.336, "lr": 1.5251536923403426e-05, "epoch": 0.6631470512490343, "percentage": 75.74, "elapsed_time": "0:08:33", "remaining_time": "0:02:44", "throughput": 52682.1, "total_tokens": 27038528}
|
570 |
+
{"current_steps": 2580, "total_steps": 3400, "loss": 0.2867, "lr": 1.5077108806530581e-05, "epoch": 0.6644347154262168, "percentage": 75.88, "elapsed_time": "0:10:01", "remaining_time": "0:03:11", "throughput": 45068.81, "total_tokens": 27090792}
|
571 |
+
{"current_steps": 2585, "total_steps": 3400, "loss": 0.2898, "lr": 1.4903506621185192e-05, "epoch": 0.6657223796033994, "percentage": 76.03, "elapsed_time": "0:11:28", "remaining_time": "0:03:36", "throughput": 39447.58, "total_tokens": 27143544}
|
572 |
+
{"current_steps": 2590, "total_steps": 3400, "loss": 0.2955, "lr": 1.4730734473084568e-05, "epoch": 0.667010043780582, "percentage": 76.18, "elapsed_time": "0:12:56", "remaining_time": "0:04:02", "throughput": 35030.09, "total_tokens": 27195632}
|
573 |
+
{"current_steps": 2595, "total_steps": 3400, "loss": 0.281, "lr": 1.4558796448315504e-05, "epoch": 0.6682977079577647, "percentage": 76.32, "elapsed_time": "0:14:22", "remaining_time": "0:04:27", "throughput": 31584.55, "total_tokens": 27248472}
|
574 |
+
{"current_steps": 2600, "total_steps": 3400, "loss": 0.3075, "lr": 1.4387696613237612e-05, "epoch": 0.6695853721349472, "percentage": 76.47, "elapsed_time": "0:15:50", "remaining_time": "0:04:52", "throughput": 28726.52, "total_tokens": 27301776}
|
575 |
+
{"current_steps": 2600, "total_steps": 3400, "eval_loss": 0.4504788815975189, "epoch": 0.6695853721349472, "percentage": 76.47, "elapsed_time": "0:16:55", "remaining_time": "0:05:12", "throughput": 26882.73, "total_tokens": 27301776}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ed8b409e805b958140d9c8d6cad43f19d2d8518b89641bfc2a345391a819bfe
|
3 |
size 7416
|