Training in progress, step 3500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891558696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:869559a5ae5ad7f1c26df10072eb69150c395ca8de50790bd49b4fe2680e9d2a
|
3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783272762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:209a99db0577027fa2c361d3c3c432d63c3657ce2b73bf2d7a0cc3bbdd3a0773
|
3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8441833b2ccddbdbfb81a5b41b410f598dfb2796bdfdf3689c4bebcaf3da6bb
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74b133c003556814e78a8921a1c25cb078726e4bad98aa7a030c1c78956cb745
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -667,6 +667,116 @@
|
|
667 |
"eval_samples_per_second": 25.455,
|
668 |
"eval_steps_per_second": 3.185,
|
669 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
670 |
}
|
671 |
],
|
672 |
"logging_steps": 50,
|
@@ -686,7 +796,7 @@
|
|
686 |
"attributes": {}
|
687 |
}
|
688 |
},
|
689 |
-
"total_flos": 1.
|
690 |
"train_batch_size": 8,
|
691 |
"trial_name": null,
|
692 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.11222900450229645,
|
3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-3500",
|
4 |
+
"epoch": 0.6147361025731097,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 3500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
667 |
"eval_samples_per_second": 25.455,
|
668 |
"eval_steps_per_second": 3.185,
|
669 |
"step": 3000
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 0.5356986036708528,
|
673 |
+
"grad_norm": 9072.705078125,
|
674 |
+
"learning_rate": 2.5981907605831725e-05,
|
675 |
+
"loss": 0.1249,
|
676 |
+
"step": 3050
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 0.5444805479933257,
|
680 |
+
"grad_norm": 11936.0400390625,
|
681 |
+
"learning_rate": 2.591603723871421e-05,
|
682 |
+
"loss": 0.1205,
|
683 |
+
"step": 3100
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 0.5444805479933257,
|
687 |
+
"eval_loss": 0.11320458352565765,
|
688 |
+
"eval_runtime": 174.8633,
|
689 |
+
"eval_samples_per_second": 25.506,
|
690 |
+
"eval_steps_per_second": 3.191,
|
691 |
+
"step": 3100
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"epoch": 0.5532624923157987,
|
695 |
+
"grad_norm": 11488.6748046875,
|
696 |
+
"learning_rate": 2.58501668715967e-05,
|
697 |
+
"loss": 0.1202,
|
698 |
+
"step": 3150
|
699 |
+
},
|
700 |
+
{
|
701 |
+
"epoch": 0.5620444366382717,
|
702 |
+
"grad_norm": 12126.583984375,
|
703 |
+
"learning_rate": 2.5784296504479185e-05,
|
704 |
+
"loss": 0.12,
|
705 |
+
"step": 3200
|
706 |
+
},
|
707 |
+
{
|
708 |
+
"epoch": 0.5620444366382717,
|
709 |
+
"eval_loss": 0.11316446959972382,
|
710 |
+
"eval_runtime": 174.862,
|
711 |
+
"eval_samples_per_second": 25.506,
|
712 |
+
"eval_steps_per_second": 3.191,
|
713 |
+
"step": 3200
|
714 |
+
},
|
715 |
+
{
|
716 |
+
"epoch": 0.5708263809607447,
|
717 |
+
"grad_norm": 14138.3876953125,
|
718 |
+
"learning_rate": 2.5718426137361675e-05,
|
719 |
+
"loss": 0.1272,
|
720 |
+
"step": 3250
|
721 |
+
},
|
722 |
+
{
|
723 |
+
"epoch": 0.5796083252832177,
|
724 |
+
"grad_norm": 6703.96728515625,
|
725 |
+
"learning_rate": 2.5652555770244158e-05,
|
726 |
+
"loss": 0.1245,
|
727 |
+
"step": 3300
|
728 |
+
},
|
729 |
+
{
|
730 |
+
"epoch": 0.5796083252832177,
|
731 |
+
"eval_loss": 0.11248895525932312,
|
732 |
+
"eval_runtime": 175.0938,
|
733 |
+
"eval_samples_per_second": 25.472,
|
734 |
+
"eval_steps_per_second": 3.187,
|
735 |
+
"step": 3300
|
736 |
+
},
|
737 |
+
{
|
738 |
+
"epoch": 0.5883902696056907,
|
739 |
+
"grad_norm": 14042.6962890625,
|
740 |
+
"learning_rate": 2.5586685403126648e-05,
|
741 |
+
"loss": 0.1135,
|
742 |
+
"step": 3350
|
743 |
+
},
|
744 |
+
{
|
745 |
+
"epoch": 0.5971722139281637,
|
746 |
+
"grad_norm": 11223.4375,
|
747 |
+
"learning_rate": 2.5520815036009135e-05,
|
748 |
+
"loss": 0.1192,
|
749 |
+
"step": 3400
|
750 |
+
},
|
751 |
+
{
|
752 |
+
"epoch": 0.5971722139281637,
|
753 |
+
"eval_loss": 0.11230960488319397,
|
754 |
+
"eval_runtime": 175.0029,
|
755 |
+
"eval_samples_per_second": 25.485,
|
756 |
+
"eval_steps_per_second": 3.189,
|
757 |
+
"step": 3400
|
758 |
+
},
|
759 |
+
{
|
760 |
+
"epoch": 0.6059541582506367,
|
761 |
+
"grad_norm": 10186.056640625,
|
762 |
+
"learning_rate": 2.5454944668891625e-05,
|
763 |
+
"loss": 0.1133,
|
764 |
+
"step": 3450
|
765 |
+
},
|
766 |
+
{
|
767 |
+
"epoch": 0.6147361025731097,
|
768 |
+
"grad_norm": 11724.2939453125,
|
769 |
+
"learning_rate": 2.5389074301774108e-05,
|
770 |
+
"loss": 0.1191,
|
771 |
+
"step": 3500
|
772 |
+
},
|
773 |
+
{
|
774 |
+
"epoch": 0.6147361025731097,
|
775 |
+
"eval_loss": 0.11222900450229645,
|
776 |
+
"eval_runtime": 174.8917,
|
777 |
+
"eval_samples_per_second": 25.501,
|
778 |
+
"eval_steps_per_second": 3.191,
|
779 |
+
"step": 3500
|
780 |
}
|
781 |
],
|
782 |
"logging_steps": 50,
|
|
|
796 |
"attributes": {}
|
797 |
}
|
798 |
},
|
799 |
+
"total_flos": 1.705082093568e+16,
|
800 |
"train_batch_size": 8,
|
801 |
"trial_name": null,
|
802 |
"trial_params": null
|