Training in progress, step 300, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b81846ac02e033886bed1edec96d0da70d2a41c811163f600e741ff029d3c0d0
|
3 |
size 29034840
|
last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a1ea87d9a460095f8a6dfc71f41e3187bdd83627478f1fada09b55e61203a75
|
3 |
+
size 43429616
|
last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bd0be5be8bcff8fad1ee892c578865dc476ce296b7d9997971a3bf192bcfdc7
|
3 |
+
size 43429616
|
last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fc3f4921583b0c695b5ce4035e669191dc439f047edb565d1da43103b3ee63b
|
3 |
+
size 43429616
|
last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75f958c4b057bf39849b81a20c3e2860e0937295d26c9a3f9a625b944a3dc343
|
3 |
+
size 43429616
|
last-checkpoint/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4de5206222f5f6bc48ade0dd830e1dc6401703feaf7328bb17787793a784ed8
|
3 |
+
size 637299
|
last-checkpoint/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9c15f3de1191eba8ee412eb11cf43d8297d842d97f58ff3416540e6e60132b4
|
3 |
+
size 637171
|
last-checkpoint/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3363de2e039012fbc15be9a390355ad5f78df5709c5a29467c717bbc3d0644c
|
3 |
+
size 637171
|
last-checkpoint/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78419129614e77d4f1d73fe2039db0059edfa88fe000e6d37b57f43d85197257
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step300
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef002048764051a71fb00f8f978e9ec32b780dc850bdb059af362cc56494234b
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37194a6d48612e1a46a2d5d317ead97c70d9fc4569b0118fcd5f84c3dc9daa5a
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17c179483659a784aa1ace2427daff48c556a6bcc3c330e6f3274e4dc95e4b49
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b56857c9b117629f35af2c3d64f522d33a9d8aa94faa81ec6956380a895118c4
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad5c396c3dfcdbd96b175132537b2bcb7ae04407121e3f88285feb248284457a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.810763955116272,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-250",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -452,11 +452,100 @@
|
|
452 |
"eval_steps_per_second": 0.783,
|
453 |
"num_input_tokens_seen": 2587272,
|
454 |
"step": 250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
}
|
456 |
],
|
457 |
"logging_steps": 5,
|
458 |
"max_steps": 6770,
|
459 |
-
"num_input_tokens_seen":
|
460 |
"num_train_epochs": 2,
|
461 |
"save_steps": 50,
|
462 |
"stateful_callbacks": {
|
@@ -471,7 +560,7 @@
|
|
471 |
"attributes": {}
|
472 |
}
|
473 |
},
|
474 |
-
"total_flos":
|
475 |
"train_batch_size": 1,
|
476 |
"trial_name": null,
|
477 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.810763955116272,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-250",
|
4 |
+
"epoch": 0.08862629246676514,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
452 |
"eval_steps_per_second": 0.783,
|
453 |
"num_input_tokens_seen": 2587272,
|
454 |
"step": 250
|
455 |
+
},
|
456 |
+
{
|
457 |
+
"epoch": 0.07533234859675036,
|
458 |
+
"grad_norm": 2.353270620583961,
|
459 |
+
"learning_rate": 7.522123893805309e-05,
|
460 |
+
"loss": 0.7851,
|
461 |
+
"num_input_tokens_seen": 2638632,
|
462 |
+
"step": 255
|
463 |
+
},
|
464 |
+
{
|
465 |
+
"epoch": 0.07680945347119646,
|
466 |
+
"grad_norm": 3.0664271713541873,
|
467 |
+
"learning_rate": 7.669616519174043e-05,
|
468 |
+
"loss": 0.7211,
|
469 |
+
"num_input_tokens_seen": 2691016,
|
470 |
+
"step": 260
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"epoch": 0.07828655834564253,
|
474 |
+
"grad_norm": 5.498034008223314,
|
475 |
+
"learning_rate": 7.817109144542774e-05,
|
476 |
+
"loss": 0.8082,
|
477 |
+
"num_input_tokens_seen": 2742912,
|
478 |
+
"step": 265
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 0.07976366322008863,
|
482 |
+
"grad_norm": 14.573947499657416,
|
483 |
+
"learning_rate": 7.964601769911504e-05,
|
484 |
+
"loss": 0.7485,
|
485 |
+
"num_input_tokens_seen": 2795264,
|
486 |
+
"step": 270
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"epoch": 0.08124076809453472,
|
490 |
+
"grad_norm": 3.007807281619928,
|
491 |
+
"learning_rate": 8.112094395280237e-05,
|
492 |
+
"loss": 0.7454,
|
493 |
+
"num_input_tokens_seen": 2846344,
|
494 |
+
"step": 275
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"epoch": 0.0827178729689808,
|
498 |
+
"grad_norm": 6.015750773450144,
|
499 |
+
"learning_rate": 8.259587020648968e-05,
|
500 |
+
"loss": 0.7258,
|
501 |
+
"num_input_tokens_seen": 2898304,
|
502 |
+
"step": 280
|
503 |
+
},
|
504 |
+
{
|
505 |
+
"epoch": 0.08419497784342689,
|
506 |
+
"grad_norm": 2.390238834834483,
|
507 |
+
"learning_rate": 8.4070796460177e-05,
|
508 |
+
"loss": 0.7863,
|
509 |
+
"num_input_tokens_seen": 2951368,
|
510 |
+
"step": 285
|
511 |
+
},
|
512 |
+
{
|
513 |
+
"epoch": 0.08567208271787297,
|
514 |
+
"grad_norm": 3.0216023427899357,
|
515 |
+
"learning_rate": 8.554572271386431e-05,
|
516 |
+
"loss": 0.7105,
|
517 |
+
"num_input_tokens_seen": 3003288,
|
518 |
+
"step": 290
|
519 |
+
},
|
520 |
+
{
|
521 |
+
"epoch": 0.08714918759231906,
|
522 |
+
"grad_norm": 3.255437171887138,
|
523 |
+
"learning_rate": 8.702064896755162e-05,
|
524 |
+
"loss": 0.6885,
|
525 |
+
"num_input_tokens_seen": 3054808,
|
526 |
+
"step": 295
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"epoch": 0.08862629246676514,
|
530 |
+
"grad_norm": 3.506440325367033,
|
531 |
+
"learning_rate": 8.849557522123895e-05,
|
532 |
+
"loss": 0.78,
|
533 |
+
"num_input_tokens_seen": 3107200,
|
534 |
+
"step": 300
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 0.08862629246676514,
|
538 |
+
"eval_loss": 0.8194220662117004,
|
539 |
+
"eval_runtime": 19.1748,
|
540 |
+
"eval_samples_per_second": 3.129,
|
541 |
+
"eval_steps_per_second": 0.782,
|
542 |
+
"num_input_tokens_seen": 3107200,
|
543 |
+
"step": 300
|
544 |
}
|
545 |
],
|
546 |
"logging_steps": 5,
|
547 |
"max_steps": 6770,
|
548 |
+
"num_input_tokens_seen": 3107200,
|
549 |
"num_train_epochs": 2,
|
550 |
"save_steps": 50,
|
551 |
"stateful_callbacks": {
|
|
|
560 |
"attributes": {}
|
561 |
}
|
562 |
},
|
563 |
+
"total_flos": 204907697143808.0,
|
564 |
"train_batch_size": 1,
|
565 |
"trial_name": null,
|
566 |
"trial_params": null
|