Training in progress, step 1400, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1400/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1400/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1400/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1400/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +95 -6
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18516456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2ea934ab728b1244cad904f211efc8dbea34bf120d0aaee69bc85b174ce604c
|
3 |
size 18516456
|
last-checkpoint/global_step1400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65a32506fe9a075e8bb36535dc8dbcdb5b1ce6a1e0d1997f92b88f46d29d5dd8
|
3 |
+
size 27700976
|
last-checkpoint/global_step1400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f4b2a134a207aac795fafdcc138df8fdf042c4133bd6620913e59b0d9c27457
|
3 |
+
size 27700976
|
last-checkpoint/global_step1400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15dd8165e977edcbd6994c5489318245c9ad54541455e3124e9c073749122323
|
3 |
+
size 27700976
|
last-checkpoint/global_step1400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ad19787732e8d8273fa1a687f45f2456da566d021f2d4115579ccd661a1028f
|
3 |
+
size 27700976
|
last-checkpoint/global_step1400/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fec36d552bf6c107a3880a898ec20ae09651970c11b4a63ac3665580b5ffb88d
|
3 |
+
size 411571
|
last-checkpoint/global_step1400/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b9f5b60fd43cde7e00ab49e3793d827c723f499d77b9bf0439866dd5645ddd4
|
3 |
+
size 411507
|
last-checkpoint/global_step1400/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:def6be980722c6b0e706f08920073db9d89e86c803748b7f3de1245f5b6687a6
|
3 |
+
size 411507
|
last-checkpoint/global_step1400/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80cdb09ea542cc0993daee7484026b223c5352c21c5c12b0593dbecbee4ea110
|
3 |
+
size 411507
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1400
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe66a68e61de2221b30fd9749bc68b45a1474bb2cc95901bca9557ac87909355
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd4f3162e46c3bb0f1fc4d3c52c7c33e60f56764458e0c8a73c3810b0a25f8c
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:185cc99aaa81b1b49b3ddc74aa6f97aa3036330983a7b69d52bd191057f9a5d5
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e37403c30cb4309e54e5defdde1906486716fc859274035d44aaac5d48a97ba
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba18a87eab7efca964506a3a18bdf9452eae09db2b3c77bd3a82db3283b5abc0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2410,11 +2410,100 @@
|
|
2410 |
"eval_steps_per_second": 0.933,
|
2411 |
"num_input_tokens_seen": 15789848,
|
2412 |
"step": 1350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2413 |
}
|
2414 |
],
|
2415 |
"logging_steps": 5,
|
2416 |
"max_steps": 3400,
|
2417 |
-
"num_input_tokens_seen":
|
2418 |
"num_train_epochs": 2,
|
2419 |
"save_steps": 50,
|
2420 |
"stateful_callbacks": {
|
@@ -2429,7 +2518,7 @@
|
|
2429 |
"attributes": {}
|
2430 |
}
|
2431 |
},
|
2432 |
-
"total_flos":
|
2433 |
"train_batch_size": 1,
|
2434 |
"trial_name": null,
|
2435 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6643603444099426,
|
3 |
+
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1400",
|
4 |
+
"epoch": 0.7210919392222508,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 1400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2410 |
"eval_steps_per_second": 0.933,
|
2411 |
"num_input_tokens_seen": 15789848,
|
2412 |
"step": 1350
|
2413 |
+
},
|
2414 |
+
{
|
2415 |
+
"epoch": 0.6979139840329642,
|
2416 |
+
"grad_norm": 2.128495144345323,
|
2417 |
+
"learning_rate": 7.030720984447279e-05,
|
2418 |
+
"loss": 0.7005,
|
2419 |
+
"num_input_tokens_seen": 15848328,
|
2420 |
+
"step": 1355
|
2421 |
+
},
|
2422 |
+
{
|
2423 |
+
"epoch": 0.7004893123873294,
|
2424 |
+
"grad_norm": 1.9954206386005497,
|
2425 |
+
"learning_rate": 7.008477123264848e-05,
|
2426 |
+
"loss": 0.7406,
|
2427 |
+
"num_input_tokens_seen": 15906824,
|
2428 |
+
"step": 1360
|
2429 |
+
},
|
2430 |
+
{
|
2431 |
+
"epoch": 0.7030646407416946,
|
2432 |
+
"grad_norm": 2.2104679425901397,
|
2433 |
+
"learning_rate": 6.986185761302224e-05,
|
2434 |
+
"loss": 0.73,
|
2435 |
+
"num_input_tokens_seen": 15965312,
|
2436 |
+
"step": 1365
|
2437 |
+
},
|
2438 |
+
{
|
2439 |
+
"epoch": 0.7056399690960597,
|
2440 |
+
"grad_norm": 1.4881688553415275,
|
2441 |
+
"learning_rate": 6.963847425753403e-05,
|
2442 |
+
"loss": 0.7069,
|
2443 |
+
"num_input_tokens_seen": 16023824,
|
2444 |
+
"step": 1370
|
2445 |
+
},
|
2446 |
+
{
|
2447 |
+
"epoch": 0.7082152974504249,
|
2448 |
+
"grad_norm": 1.7307886623214839,
|
2449 |
+
"learning_rate": 6.941462644923318e-05,
|
2450 |
+
"loss": 0.6859,
|
2451 |
+
"num_input_tokens_seen": 16082280,
|
2452 |
+
"step": 1375
|
2453 |
+
},
|
2454 |
+
{
|
2455 |
+
"epoch": 0.7107906258047901,
|
2456 |
+
"grad_norm": 1.996363722225207,
|
2457 |
+
"learning_rate": 6.919031948215335e-05,
|
2458 |
+
"loss": 0.7254,
|
2459 |
+
"num_input_tokens_seen": 16140800,
|
2460 |
+
"step": 1380
|
2461 |
+
},
|
2462 |
+
{
|
2463 |
+
"epoch": 0.7133659541591553,
|
2464 |
+
"grad_norm": 1.9723274395570518,
|
2465 |
+
"learning_rate": 6.896555866118741e-05,
|
2466 |
+
"loss": 0.717,
|
2467 |
+
"num_input_tokens_seen": 16199320,
|
2468 |
+
"step": 1385
|
2469 |
+
},
|
2470 |
+
{
|
2471 |
+
"epoch": 0.7159412825135205,
|
2472 |
+
"grad_norm": 1.741253496639104,
|
2473 |
+
"learning_rate": 6.87403493019619e-05,
|
2474 |
+
"loss": 0.7094,
|
2475 |
+
"num_input_tokens_seen": 16257768,
|
2476 |
+
"step": 1390
|
2477 |
+
},
|
2478 |
+
{
|
2479 |
+
"epoch": 0.7185166108678857,
|
2480 |
+
"grad_norm": 1.6218002074106608,
|
2481 |
+
"learning_rate": 6.851469673071143e-05,
|
2482 |
+
"loss": 0.7862,
|
2483 |
+
"num_input_tokens_seen": 16316264,
|
2484 |
+
"step": 1395
|
2485 |
+
},
|
2486 |
+
{
|
2487 |
+
"epoch": 0.7210919392222508,
|
2488 |
+
"grad_norm": 1.7586707307941614,
|
2489 |
+
"learning_rate": 6.828860628415253e-05,
|
2490 |
+
"loss": 0.7359,
|
2491 |
+
"num_input_tokens_seen": 16374784,
|
2492 |
+
"step": 1400
|
2493 |
+
},
|
2494 |
+
{
|
2495 |
+
"epoch": 0.7210919392222508,
|
2496 |
+
"eval_loss": 0.6643603444099426,
|
2497 |
+
"eval_runtime": 16.1894,
|
2498 |
+
"eval_samples_per_second": 3.706,
|
2499 |
+
"eval_steps_per_second": 0.927,
|
2500 |
+
"num_input_tokens_seen": 16374784,
|
2501 |
+
"step": 1400
|
2502 |
}
|
2503 |
],
|
2504 |
"logging_steps": 5,
|
2505 |
"max_steps": 3400,
|
2506 |
+
"num_input_tokens_seen": 16374784,
|
2507 |
"num_train_epochs": 2,
|
2508 |
"save_steps": 50,
|
2509 |
"stateful_callbacks": {
|
|
|
2518 |
"attributes": {}
|
2519 |
}
|
2520 |
},
|
2521 |
+
"total_flos": 919473251942400.0,
|
2522 |
"train_batch_size": 1,
|
2523 |
"trial_name": null,
|
2524 |
"trial_params": null
|