Training in progress, step 300, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +95 -6
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18516456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b99b8b6736c9551b7572fb855984029765a2e7449d447f3321e71447af523fc8
|
3 |
size 18516456
|
last-checkpoint/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb2fe97b72b18e1b06d790cd0048ed44a05a1e5a39d4ef283b3835be894019fc
|
3 |
+
size 27700976
|
last-checkpoint/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33551976b42dd9fe9ff7141cf66ca30bcc7ede611d7f697fa57a7b782a8dd060
|
3 |
+
size 27700976
|
last-checkpoint/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02d240351e1d36e8d3995c129c11871b7fe39972e02236eef7f1b586ccda5015
|
3 |
+
size 27700976
|
last-checkpoint/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9fe797e05e77dff6b139e49ef3269ae6a923d455e5b82b1a9324bbdb7b9dd0f
|
3 |
+
size 27700976
|
last-checkpoint/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:466d2bd70556de01e77927d489b8b824cae46eb687bd72a33613389ace256129
|
3 |
+
size 411571
|
last-checkpoint/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4462a91000f87f6f68f470477a3b4b4797055c31343e143a43b66f698e7b122
|
3 |
+
size 411507
|
last-checkpoint/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b93f71e7375d5c7fea0e0456caa855df4c11e4823976f294fe192bdb5f785651
|
3 |
+
size 411507
|
last-checkpoint/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14f0fae20515bd14a4275cbf85eb4db5b258c692fc299f9a68e861c8cf095e39
|
3 |
+
size 411507
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step300
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef002048764051a71fb00f8f978e9ec32b780dc850bdb059af362cc56494234b
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37194a6d48612e1a46a2d5d317ead97c70d9fc4569b0118fcd5f84c3dc9daa5a
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17c179483659a784aa1ace2427daff48c556a6bcc3c330e6f3274e4dc95e4b49
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b56857c9b117629f35af2c3d64f522d33a9d8aa94faa81ec6956380a895118c4
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75979a52de119ed6c9cd6063ded2b7d6cb6c4870db4a05a6cc6a997d1fa9b653
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -452,11 +452,100 @@
|
|
452 |
"eval_steps_per_second": 0.858,
|
453 |
"num_input_tokens_seen": 2924016,
|
454 |
"step": 250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
}
|
456 |
],
|
457 |
"logging_steps": 5,
|
458 |
"max_steps": 3400,
|
459 |
-
"num_input_tokens_seen":
|
460 |
"num_train_epochs": 2,
|
461 |
"save_steps": 50,
|
462 |
"stateful_callbacks": {
|
@@ -471,7 +560,7 @@
|
|
471 |
"attributes": {}
|
472 |
}
|
473 |
},
|
474 |
-
"total_flos":
|
475 |
"train_batch_size": 1,
|
476 |
"trial_name": null,
|
477 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8908902406692505,
|
3 |
+
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-300",
|
4 |
+
"epoch": 0.1545197012619109,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
452 |
"eval_steps_per_second": 0.858,
|
453 |
"num_input_tokens_seen": 2924016,
|
454 |
"step": 250
|
455 |
+
},
|
456 |
+
{
|
457 |
+
"epoch": 0.13134174607262425,
|
458 |
+
"grad_norm": 0.3762164361984238,
|
459 |
+
"learning_rate": 9.98292246503335e-05,
|
460 |
+
"loss": 0.8987,
|
461 |
+
"num_input_tokens_seen": 2982520,
|
462 |
+
"step": 255
|
463 |
+
},
|
464 |
+
{
|
465 |
+
"epoch": 0.13391707442698944,
|
466 |
+
"grad_norm": 0.6447043002410199,
|
467 |
+
"learning_rate": 9.980855572238714e-05,
|
468 |
+
"loss": 0.9036,
|
469 |
+
"num_input_tokens_seen": 3041008,
|
470 |
+
"step": 260
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"epoch": 0.13649240278135463,
|
474 |
+
"grad_norm": 0.5308092769971742,
|
475 |
+
"learning_rate": 9.978670881475172e-05,
|
476 |
+
"loss": 0.8961,
|
477 |
+
"num_input_tokens_seen": 3099464,
|
478 |
+
"step": 265
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 0.1390677311357198,
|
482 |
+
"grad_norm": 0.508333330469703,
|
483 |
+
"learning_rate": 9.976368444410985e-05,
|
484 |
+
"loss": 0.9012,
|
485 |
+
"num_input_tokens_seen": 3157944,
|
486 |
+
"step": 270
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"epoch": 0.141643059490085,
|
490 |
+
"grad_norm": 0.6801788563719119,
|
491 |
+
"learning_rate": 9.973948315499126e-05,
|
492 |
+
"loss": 0.8985,
|
493 |
+
"num_input_tokens_seen": 3216448,
|
494 |
+
"step": 275
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"epoch": 0.14421838784445018,
|
498 |
+
"grad_norm": 0.6933074703933572,
|
499 |
+
"learning_rate": 9.971410551976002e-05,
|
500 |
+
"loss": 0.9114,
|
501 |
+
"num_input_tokens_seen": 3274928,
|
502 |
+
"step": 280
|
503 |
+
},
|
504 |
+
{
|
505 |
+
"epoch": 0.14679371619881534,
|
506 |
+
"grad_norm": 0.21208820897494882,
|
507 |
+
"learning_rate": 9.968755213860094e-05,
|
508 |
+
"loss": 0.8886,
|
509 |
+
"num_input_tokens_seen": 3333408,
|
510 |
+
"step": 285
|
511 |
+
},
|
512 |
+
{
|
513 |
+
"epoch": 0.14936904455318054,
|
514 |
+
"grad_norm": 0.5791422669000065,
|
515 |
+
"learning_rate": 9.96598236395054e-05,
|
516 |
+
"loss": 0.8929,
|
517 |
+
"num_input_tokens_seen": 3391896,
|
518 |
+
"step": 290
|
519 |
+
},
|
520 |
+
{
|
521 |
+
"epoch": 0.1519443729075457,
|
522 |
+
"grad_norm": 0.3460368893191152,
|
523 |
+
"learning_rate": 9.96309206782565e-05,
|
524 |
+
"loss": 0.9091,
|
525 |
+
"num_input_tokens_seen": 3450392,
|
526 |
+
"step": 295
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"epoch": 0.1545197012619109,
|
530 |
+
"grad_norm": 0.22425222135997747,
|
531 |
+
"learning_rate": 9.960084393841355e-05,
|
532 |
+
"loss": 0.8893,
|
533 |
+
"num_input_tokens_seen": 3508888,
|
534 |
+
"step": 300
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 0.1545197012619109,
|
538 |
+
"eval_loss": 0.8908902406692505,
|
539 |
+
"eval_runtime": 16.9521,
|
540 |
+
"eval_samples_per_second": 3.539,
|
541 |
+
"eval_steps_per_second": 0.885,
|
542 |
+
"num_input_tokens_seen": 3508888,
|
543 |
+
"step": 300
|
544 |
}
|
545 |
],
|
546 |
"logging_steps": 5,
|
547 |
"max_steps": 3400,
|
548 |
+
"num_input_tokens_seen": 3508888,
|
549 |
"num_train_epochs": 2,
|
550 |
"save_steps": 50,
|
551 |
"stateful_callbacks": {
|
|
|
560 |
"attributes": {}
|
561 |
}
|
562 |
},
|
563 |
+
"total_flos": 196997011734528.0,
|
564 |
"train_batch_size": 1,
|
565 |
"trial_name": null,
|
566 |
"trial_params": null
|