Training in progress, step 300, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +95 -6
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c6b6b2163a165d1469f79ac83287cf58b9febbee9aa4bb628b13cd978a905d2
|
3 |
size 29034840
|
last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a7f8fb47b4fd856f28f876db8029858e01b075a9e06df2ffd855ab80a387321
|
3 |
+
size 43429616
|
last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22bf056a4791dde098eda0a6ad1295601781e91929ca95a093f811f735f7d95b
|
3 |
+
size 43429616
|
last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6649a4d4e087da0239b97d58a774f6fe70b7e8c18097da2c986f6a5761d088e6
|
3 |
+
size 43429616
|
last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:297d7acb22e1081cf2fc6bad98f49dcbf4b535e6b990f560f28bd2873baf11dc
|
3 |
+
size 43429616
|
last-checkpoint/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4de5206222f5f6bc48ade0dd830e1dc6401703feaf7328bb17787793a784ed8
|
3 |
+
size 637299
|
last-checkpoint/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9c15f3de1191eba8ee412eb11cf43d8297d842d97f58ff3416540e6e60132b4
|
3 |
+
size 637171
|
last-checkpoint/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3363de2e039012fbc15be9a390355ad5f78df5709c5a29467c717bbc3d0644c
|
3 |
+
size 637171
|
last-checkpoint/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78419129614e77d4f1d73fe2039db0059edfa88fe000e6d37b57f43d85197257
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step300
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef002048764051a71fb00f8f978e9ec32b780dc850bdb059af362cc56494234b
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37194a6d48612e1a46a2d5d317ead97c70d9fc4569b0118fcd5f84c3dc9daa5a
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17c179483659a784aa1ace2427daff48c556a6bcc3c330e6f3274e4dc95e4b49
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b56857c9b117629f35af2c3d64f522d33a9d8aa94faa81ec6956380a895118c4
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75979a52de119ed6c9cd6063ded2b7d6cb6c4870db4a05a6cc6a997d1fa9b653
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "saves/CADICA_qwenvl_direction_scale4/lora/sft/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -452,11 +452,100 @@
|
|
452 |
"eval_steps_per_second": 0.769,
|
453 |
"num_input_tokens_seen": 2496000,
|
454 |
"step": 250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
}
|
456 |
],
|
457 |
"logging_steps": 5,
|
458 |
"max_steps": 3400,
|
459 |
-
"num_input_tokens_seen":
|
460 |
"num_train_epochs": 2,
|
461 |
"save_steps": 50,
|
462 |
"stateful_callbacks": {
|
@@ -471,7 +560,7 @@
|
|
471 |
"attributes": {}
|
472 |
}
|
473 |
},
|
474 |
-
"total_flos":
|
475 |
"train_batch_size": 1,
|
476 |
"trial_name": null,
|
477 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.030908752232789993,
|
3 |
+
"best_model_checkpoint": "saves/CADICA_qwenvl_direction_scale4/lora/sft/checkpoint-300",
|
4 |
+
"epoch": 0.1545197012619109,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
452 |
"eval_steps_per_second": 0.769,
|
453 |
"num_input_tokens_seen": 2496000,
|
454 |
"step": 250
|
455 |
+
},
|
456 |
+
{
|
457 |
+
"epoch": 0.13134174607262425,
|
458 |
+
"grad_norm": 0.845514751132877,
|
459 |
+
"learning_rate": 9.98292246503335e-05,
|
460 |
+
"loss": 0.0235,
|
461 |
+
"num_input_tokens_seen": 2545920,
|
462 |
+
"step": 255
|
463 |
+
},
|
464 |
+
{
|
465 |
+
"epoch": 0.13391707442698944,
|
466 |
+
"grad_norm": 1.7441538057875345,
|
467 |
+
"learning_rate": 9.980855572238714e-05,
|
468 |
+
"loss": 0.0137,
|
469 |
+
"num_input_tokens_seen": 2595840,
|
470 |
+
"step": 260
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"epoch": 0.13649240278135463,
|
474 |
+
"grad_norm": 0.6628270146260998,
|
475 |
+
"learning_rate": 9.978670881475172e-05,
|
476 |
+
"loss": 0.0605,
|
477 |
+
"num_input_tokens_seen": 2645760,
|
478 |
+
"step": 265
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 0.1390677311357198,
|
482 |
+
"grad_norm": 4.49926360488578,
|
483 |
+
"learning_rate": 9.976368444410985e-05,
|
484 |
+
"loss": 0.0585,
|
485 |
+
"num_input_tokens_seen": 2695680,
|
486 |
+
"step": 270
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"epoch": 0.141643059490085,
|
490 |
+
"grad_norm": 0.7159498856832776,
|
491 |
+
"learning_rate": 9.973948315499126e-05,
|
492 |
+
"loss": 0.0096,
|
493 |
+
"num_input_tokens_seen": 2745600,
|
494 |
+
"step": 275
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"epoch": 0.14421838784445018,
|
498 |
+
"grad_norm": 4.09842502301765,
|
499 |
+
"learning_rate": 9.971410551976002e-05,
|
500 |
+
"loss": 0.1383,
|
501 |
+
"num_input_tokens_seen": 2795520,
|
502 |
+
"step": 280
|
503 |
+
},
|
504 |
+
{
|
505 |
+
"epoch": 0.14679371619881534,
|
506 |
+
"grad_norm": 1.0773897808674016,
|
507 |
+
"learning_rate": 9.968755213860094e-05,
|
508 |
+
"loss": 0.049,
|
509 |
+
"num_input_tokens_seen": 2845440,
|
510 |
+
"step": 285
|
511 |
+
},
|
512 |
+
{
|
513 |
+
"epoch": 0.14936904455318054,
|
514 |
+
"grad_norm": 1.7224091076306651,
|
515 |
+
"learning_rate": 9.96598236395054e-05,
|
516 |
+
"loss": 0.0336,
|
517 |
+
"num_input_tokens_seen": 2895360,
|
518 |
+
"step": 290
|
519 |
+
},
|
520 |
+
{
|
521 |
+
"epoch": 0.1519443729075457,
|
522 |
+
"grad_norm": 0.24158350585209873,
|
523 |
+
"learning_rate": 9.96309206782565e-05,
|
524 |
+
"loss": 0.019,
|
525 |
+
"num_input_tokens_seen": 2945280,
|
526 |
+
"step": 295
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"epoch": 0.1545197012619109,
|
530 |
+
"grad_norm": 0.6614271496466305,
|
531 |
+
"learning_rate": 9.960084393841355e-05,
|
532 |
+
"loss": 0.0381,
|
533 |
+
"num_input_tokens_seen": 2995200,
|
534 |
+
"step": 300
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 0.1545197012619109,
|
538 |
+
"eval_loss": 0.030908752232789993,
|
539 |
+
"eval_runtime": 19.8702,
|
540 |
+
"eval_samples_per_second": 3.02,
|
541 |
+
"eval_steps_per_second": 0.755,
|
542 |
+
"num_input_tokens_seen": 2995200,
|
543 |
+
"step": 300
|
544 |
}
|
545 |
],
|
546 |
"logging_steps": 5,
|
547 |
"max_steps": 3400,
|
548 |
+
"num_input_tokens_seen": 2995200,
|
549 |
"num_train_epochs": 2,
|
550 |
"save_steps": 50,
|
551 |
"stateful_callbacks": {
|
|
|
560 |
"attributes": {}
|
561 |
}
|
562 |
},
|
563 |
+
"total_flos": 197584705814528.0,
|
564 |
"train_batch_size": 1,
|
565 |
"trial_name": null,
|
566 |
"trial_params": null
|