marinone94
commited on
Commit
β’
fc2f4cd
1
Parent(s):
6012db9
Training in progress, step 2700
Browse files- {checkpoint-1500 β checkpoint-2600}/config.json +0 -0
- {checkpoint-1500 β checkpoint-2600}/optimizer.pt +1 -1
- {checkpoint-1500 β checkpoint-2600}/preprocessor_config.json +0 -0
- {checkpoint-1600 β checkpoint-2600}/pytorch_model.bin +1 -1
- {checkpoint-1600 β checkpoint-2600}/rng_state.pth +1 -1
- {checkpoint-1600 β checkpoint-2600}/scaler.pt +1 -1
- {checkpoint-1500 β checkpoint-2600}/scheduler.pt +1 -1
- {checkpoint-1600 β checkpoint-2600}/trainer_state.json +393 -3
- {checkpoint-1500 β checkpoint-2600}/training_args.bin +0 -0
- {checkpoint-1600 β checkpoint-2700}/config.json +0 -0
- {checkpoint-1600 β checkpoint-2700}/optimizer.pt +1 -1
- {checkpoint-1600 β checkpoint-2700}/preprocessor_config.json +0 -0
- {checkpoint-1500 β checkpoint-2700}/pytorch_model.bin +1 -1
- {checkpoint-1500 β checkpoint-2700}/rng_state.pth +2 -2
- {checkpoint-1500 β checkpoint-2700}/scaler.pt +1 -1
- {checkpoint-1600 β checkpoint-2700}/scheduler.pt +1 -1
- {checkpoint-1500 β checkpoint-2700}/trainer_state.json +471 -3
- {checkpoint-1600 β checkpoint-2700}/training_args.bin +0 -0
{checkpoint-1500 β checkpoint-2600}/config.json
RENAMED
File without changes
|
{checkpoint-1500 β checkpoint-2600}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490337809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:446a2b6be86378473522da6f8340e8995c1f93f317dbf12dd45b200fe2a7cc10
|
3 |
size 2490337809
|
{checkpoint-1500 β checkpoint-2600}/preprocessor_config.json
RENAMED
File without changes
|
{checkpoint-1600 β checkpoint-2600}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262063089
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91a8acf504932720b905a27cdf2f6f7fe41c15519686ef918a30514dacbc0cee
|
3 |
size 1262063089
|
{checkpoint-1600 β checkpoint-2600}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9452e603549f04f4903c5fb527b7bfc83e1fbaca4162bb7fdd612e22dd24235
|
3 |
size 14567
|
{checkpoint-1600 β checkpoint-2600}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:981227375b8c6d2439b9fd2664e9cd784500649faa2e607c97ed38e07fc17be3
|
3 |
size 559
|
{checkpoint-1500 β checkpoint-2600}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e6505acc346b949554e5cbd939e1a9cf4a757843e611b8b1dc1e36690134723
|
3 |
size 623
|
{checkpoint-1600 β checkpoint-2600}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -630,11 +630,401 @@
|
|
630 |
"eval_steps_per_second": 0.795,
|
631 |
"eval_wer": 0.17885325007096226,
|
632 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
633 |
}
|
634 |
],
|
635 |
"max_steps": 4550,
|
636 |
"num_train_epochs": 50,
|
637 |
-
"total_flos":
|
638 |
"trial_name": null,
|
639 |
"trial_params": null
|
640 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 28.56830601092896,
|
5 |
+
"global_step": 2600,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
630 |
"eval_steps_per_second": 0.795,
|
631 |
"eval_wer": 0.17885325007096226,
|
632 |
"step": 1600
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 17.8,
|
636 |
+
"learning_rate": 0.00020250000000000002,
|
637 |
+
"loss": 1.0322,
|
638 |
+
"step": 1620
|
639 |
+
},
|
640 |
+
{
|
641 |
+
"epoch": 18.02,
|
642 |
+
"learning_rate": 0.000205,
|
643 |
+
"loss": 1.0176,
|
644 |
+
"step": 1640
|
645 |
+
},
|
646 |
+
{
|
647 |
+
"epoch": 18.24,
|
648 |
+
"learning_rate": 0.0002075,
|
649 |
+
"loss": 1.0272,
|
650 |
+
"step": 1660
|
651 |
+
},
|
652 |
+
{
|
653 |
+
"epoch": 18.46,
|
654 |
+
"learning_rate": 0.00021,
|
655 |
+
"loss": 0.9675,
|
656 |
+
"step": 1680
|
657 |
+
},
|
658 |
+
{
|
659 |
+
"epoch": 18.68,
|
660 |
+
"learning_rate": 0.0002125,
|
661 |
+
"loss": 0.9816,
|
662 |
+
"step": 1700
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 18.68,
|
666 |
+
"eval_loss": 0.19402356445789337,
|
667 |
+
"eval_runtime": 194.75,
|
668 |
+
"eval_samples_per_second": 24.868,
|
669 |
+
"eval_steps_per_second": 0.78,
|
670 |
+
"eval_wer": 0.18010218563724098,
|
671 |
+
"step": 1700
|
672 |
+
},
|
673 |
+
{
|
674 |
+
"epoch": 18.9,
|
675 |
+
"learning_rate": 0.000215,
|
676 |
+
"loss": 1.0023,
|
677 |
+
"step": 1720
|
678 |
+
},
|
679 |
+
{
|
680 |
+
"epoch": 19.12,
|
681 |
+
"learning_rate": 0.0002175,
|
682 |
+
"loss": 1.013,
|
683 |
+
"step": 1740
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 19.34,
|
687 |
+
"learning_rate": 0.00022,
|
688 |
+
"loss": 0.9664,
|
689 |
+
"step": 1760
|
690 |
+
},
|
691 |
+
{
|
692 |
+
"epoch": 19.56,
|
693 |
+
"learning_rate": 0.00022250000000000001,
|
694 |
+
"loss": 0.9736,
|
695 |
+
"step": 1780
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"epoch": 19.78,
|
699 |
+
"learning_rate": 0.00022500000000000002,
|
700 |
+
"loss": 0.9814,
|
701 |
+
"step": 1800
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"epoch": 19.78,
|
705 |
+
"eval_loss": 0.18596723675727844,
|
706 |
+
"eval_runtime": 188.4639,
|
707 |
+
"eval_samples_per_second": 25.697,
|
708 |
+
"eval_steps_per_second": 0.807,
|
709 |
+
"eval_wer": 0.16667612829974454,
|
710 |
+
"step": 1800
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 19.99,
|
714 |
+
"learning_rate": 0.0002275,
|
715 |
+
"loss": 1.0064,
|
716 |
+
"step": 1820
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 20.22,
|
720 |
+
"learning_rate": 0.00023,
|
721 |
+
"loss": 0.9583,
|
722 |
+
"step": 1840
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 20.44,
|
726 |
+
"learning_rate": 0.0002325,
|
727 |
+
"loss": 0.9646,
|
728 |
+
"step": 1860
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 20.66,
|
732 |
+
"learning_rate": 0.000235,
|
733 |
+
"loss": 0.9762,
|
734 |
+
"step": 1880
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 20.87,
|
738 |
+
"learning_rate": 0.0002375,
|
739 |
+
"loss": 0.9787,
|
740 |
+
"step": 1900
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 20.87,
|
744 |
+
"eval_loss": 0.18878202140331268,
|
745 |
+
"eval_runtime": 188.244,
|
746 |
+
"eval_samples_per_second": 25.727,
|
747 |
+
"eval_steps_per_second": 0.807,
|
748 |
+
"eval_wer": 0.16420664206642066,
|
749 |
+
"step": 1900
|
750 |
+
},
|
751 |
+
{
|
752 |
+
"epoch": 21.1,
|
753 |
+
"learning_rate": 0.00024,
|
754 |
+
"loss": 1.0218,
|
755 |
+
"step": 1920
|
756 |
+
},
|
757 |
+
{
|
758 |
+
"epoch": 21.32,
|
759 |
+
"learning_rate": 0.00024249999999999999,
|
760 |
+
"loss": 0.9505,
|
761 |
+
"step": 1940
|
762 |
+
},
|
763 |
+
{
|
764 |
+
"epoch": 21.54,
|
765 |
+
"learning_rate": 0.000245,
|
766 |
+
"loss": 0.9554,
|
767 |
+
"step": 1960
|
768 |
+
},
|
769 |
+
{
|
770 |
+
"epoch": 21.75,
|
771 |
+
"learning_rate": 0.0002475,
|
772 |
+
"loss": 0.9728,
|
773 |
+
"step": 1980
|
774 |
+
},
|
775 |
+
{
|
776 |
+
"epoch": 21.97,
|
777 |
+
"learning_rate": 0.00025,
|
778 |
+
"loss": 0.9699,
|
779 |
+
"step": 2000
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 21.97,
|
783 |
+
"eval_loss": 0.18748582899570465,
|
784 |
+
"eval_runtime": 190.7875,
|
785 |
+
"eval_samples_per_second": 25.384,
|
786 |
+
"eval_steps_per_second": 0.797,
|
787 |
+
"eval_wer": 0.17042293499858074,
|
788 |
+
"step": 2000
|
789 |
+
},
|
790 |
+
{
|
791 |
+
"epoch": 22.2,
|
792 |
+
"learning_rate": 0.00024803921568627453,
|
793 |
+
"loss": 0.9624,
|
794 |
+
"step": 2020
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"epoch": 22.42,
|
798 |
+
"learning_rate": 0.000246078431372549,
|
799 |
+
"loss": 0.9419,
|
800 |
+
"step": 2040
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 22.63,
|
804 |
+
"learning_rate": 0.00024411764705882354,
|
805 |
+
"loss": 0.9563,
|
806 |
+
"step": 2060
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"epoch": 22.85,
|
810 |
+
"learning_rate": 0.00024215686274509804,
|
811 |
+
"loss": 0.9643,
|
812 |
+
"step": 2080
|
813 |
+
},
|
814 |
+
{
|
815 |
+
"epoch": 23.08,
|
816 |
+
"learning_rate": 0.00024019607843137256,
|
817 |
+
"loss": 0.9616,
|
818 |
+
"step": 2100
|
819 |
+
},
|
820 |
+
{
|
821 |
+
"epoch": 23.08,
|
822 |
+
"eval_loss": 0.18017500638961792,
|
823 |
+
"eval_runtime": 191.3932,
|
824 |
+
"eval_samples_per_second": 25.304,
|
825 |
+
"eval_steps_per_second": 0.794,
|
826 |
+
"eval_wer": 0.16173715583309678,
|
827 |
+
"step": 2100
|
828 |
+
},
|
829 |
+
{
|
830 |
+
"epoch": 23.3,
|
831 |
+
"learning_rate": 0.00023823529411764704,
|
832 |
+
"loss": 0.917,
|
833 |
+
"step": 2120
|
834 |
+
},
|
835 |
+
{
|
836 |
+
"epoch": 23.51,
|
837 |
+
"learning_rate": 0.00023627450980392157,
|
838 |
+
"loss": 0.945,
|
839 |
+
"step": 2140
|
840 |
+
},
|
841 |
+
{
|
842 |
+
"epoch": 23.73,
|
843 |
+
"learning_rate": 0.0002343137254901961,
|
844 |
+
"loss": 0.9243,
|
845 |
+
"step": 2160
|
846 |
+
},
|
847 |
+
{
|
848 |
+
"epoch": 23.95,
|
849 |
+
"learning_rate": 0.0002323529411764706,
|
850 |
+
"loss": 0.9288,
|
851 |
+
"step": 2180
|
852 |
+
},
|
853 |
+
{
|
854 |
+
"epoch": 24.17,
|
855 |
+
"learning_rate": 0.0002303921568627451,
|
856 |
+
"loss": 0.9378,
|
857 |
+
"step": 2200
|
858 |
+
},
|
859 |
+
{
|
860 |
+
"epoch": 24.17,
|
861 |
+
"eval_loss": 0.17928896844387054,
|
862 |
+
"eval_runtime": 189.7619,
|
863 |
+
"eval_samples_per_second": 25.521,
|
864 |
+
"eval_steps_per_second": 0.801,
|
865 |
+
"eval_wer": 0.1577348850411581,
|
866 |
+
"step": 2200
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"epoch": 24.39,
|
870 |
+
"learning_rate": 0.0002284313725490196,
|
871 |
+
"loss": 0.9071,
|
872 |
+
"step": 2220
|
873 |
+
},
|
874 |
+
{
|
875 |
+
"epoch": 24.61,
|
876 |
+
"learning_rate": 0.00022647058823529412,
|
877 |
+
"loss": 0.9054,
|
878 |
+
"step": 2240
|
879 |
+
},
|
880 |
+
{
|
881 |
+
"epoch": 24.83,
|
882 |
+
"learning_rate": 0.0002246078431372549,
|
883 |
+
"loss": 0.9303,
|
884 |
+
"step": 2260
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 25.05,
|
888 |
+
"learning_rate": 0.00022264705882352943,
|
889 |
+
"loss": 0.9376,
|
890 |
+
"step": 2280
|
891 |
+
},
|
892 |
+
{
|
893 |
+
"epoch": 25.27,
|
894 |
+
"learning_rate": 0.0002206862745098039,
|
895 |
+
"loss": 0.888,
|
896 |
+
"step": 2300
|
897 |
+
},
|
898 |
+
{
|
899 |
+
"epoch": 25.27,
|
900 |
+
"eval_loss": 0.17642559111118317,
|
901 |
+
"eval_runtime": 187.0437,
|
902 |
+
"eval_samples_per_second": 25.892,
|
903 |
+
"eval_steps_per_second": 0.813,
|
904 |
+
"eval_wer": 0.15452739142776042,
|
905 |
+
"step": 2300
|
906 |
+
},
|
907 |
+
{
|
908 |
+
"epoch": 25.49,
|
909 |
+
"learning_rate": 0.00021872549019607843,
|
910 |
+
"loss": 0.9135,
|
911 |
+
"step": 2320
|
912 |
+
},
|
913 |
+
{
|
914 |
+
"epoch": 25.71,
|
915 |
+
"learning_rate": 0.00021676470588235294,
|
916 |
+
"loss": 0.9094,
|
917 |
+
"step": 2340
|
918 |
+
},
|
919 |
+
{
|
920 |
+
"epoch": 25.93,
|
921 |
+
"learning_rate": 0.00021480392156862746,
|
922 |
+
"loss": 0.8879,
|
923 |
+
"step": 2360
|
924 |
+
},
|
925 |
+
{
|
926 |
+
"epoch": 26.15,
|
927 |
+
"learning_rate": 0.00021284313725490196,
|
928 |
+
"loss": 0.929,
|
929 |
+
"step": 2380
|
930 |
+
},
|
931 |
+
{
|
932 |
+
"epoch": 26.37,
|
933 |
+
"learning_rate": 0.00021088235294117647,
|
934 |
+
"loss": 0.8942,
|
935 |
+
"step": 2400
|
936 |
+
},
|
937 |
+
{
|
938 |
+
"epoch": 26.37,
|
939 |
+
"eval_loss": 0.16744859516620636,
|
940 |
+
"eval_runtime": 190.6796,
|
941 |
+
"eval_samples_per_second": 25.399,
|
942 |
+
"eval_steps_per_second": 0.797,
|
943 |
+
"eval_wer": 0.14916264547260857,
|
944 |
+
"step": 2400
|
945 |
+
},
|
946 |
+
{
|
947 |
+
"epoch": 26.59,
|
948 |
+
"learning_rate": 0.000208921568627451,
|
949 |
+
"loss": 0.8717,
|
950 |
+
"step": 2420
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"epoch": 26.81,
|
954 |
+
"learning_rate": 0.0002069607843137255,
|
955 |
+
"loss": 0.8952,
|
956 |
+
"step": 2440
|
957 |
+
},
|
958 |
+
{
|
959 |
+
"epoch": 27.03,
|
960 |
+
"learning_rate": 0.000205,
|
961 |
+
"loss": 0.8849,
|
962 |
+
"step": 2460
|
963 |
+
},
|
964 |
+
{
|
965 |
+
"epoch": 27.25,
|
966 |
+
"learning_rate": 0.00020313725490196078,
|
967 |
+
"loss": 0.8781,
|
968 |
+
"step": 2480
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 27.47,
|
972 |
+
"learning_rate": 0.0002011764705882353,
|
973 |
+
"loss": 0.8701,
|
974 |
+
"step": 2500
|
975 |
+
},
|
976 |
+
{
|
977 |
+
"epoch": 27.47,
|
978 |
+
"eval_loss": 0.1738910973072052,
|
979 |
+
"eval_runtime": 187.7205,
|
980 |
+
"eval_samples_per_second": 25.799,
|
981 |
+
"eval_steps_per_second": 0.81,
|
982 |
+
"eval_wer": 0.1511779733181947,
|
983 |
+
"step": 2500
|
984 |
+
},
|
985 |
+
{
|
986 |
+
"epoch": 27.69,
|
987 |
+
"learning_rate": 0.0001992156862745098,
|
988 |
+
"loss": 0.8674,
|
989 |
+
"step": 2520
|
990 |
+
},
|
991 |
+
{
|
992 |
+
"epoch": 27.91,
|
993 |
+
"learning_rate": 0.00019725490196078433,
|
994 |
+
"loss": 0.8752,
|
995 |
+
"step": 2540
|
996 |
+
},
|
997 |
+
{
|
998 |
+
"epoch": 28.13,
|
999 |
+
"learning_rate": 0.0001952941176470588,
|
1000 |
+
"loss": 0.91,
|
1001 |
+
"step": 2560
|
1002 |
+
},
|
1003 |
+
{
|
1004 |
+
"epoch": 28.35,
|
1005 |
+
"learning_rate": 0.00019333333333333333,
|
1006 |
+
"loss": 0.8693,
|
1007 |
+
"step": 2580
|
1008 |
+
},
|
1009 |
+
{
|
1010 |
+
"epoch": 28.57,
|
1011 |
+
"learning_rate": 0.00019137254901960786,
|
1012 |
+
"loss": 0.8555,
|
1013 |
+
"step": 2600
|
1014 |
+
},
|
1015 |
+
{
|
1016 |
+
"epoch": 28.57,
|
1017 |
+
"eval_loss": 0.1689654141664505,
|
1018 |
+
"eval_runtime": 196.2665,
|
1019 |
+
"eval_samples_per_second": 24.676,
|
1020 |
+
"eval_steps_per_second": 0.774,
|
1021 |
+
"eval_wer": 0.14459267669599773,
|
1022 |
+
"step": 2600
|
1023 |
}
|
1024 |
],
|
1025 |
"max_steps": 4550,
|
1026 |
"num_train_epochs": 50,
|
1027 |
+
"total_flos": 4.023333138533005e+19,
|
1028 |
"trial_name": null,
|
1029 |
"trial_params": null
|
1030 |
}
|
{checkpoint-1500 β checkpoint-2600}/training_args.bin
RENAMED
File without changes
|
{checkpoint-1600 β checkpoint-2700}/config.json
RENAMED
File without changes
|
{checkpoint-1600 β checkpoint-2700}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490337809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:865f44abdd5045a8acf626b3b992e8fdc983a2d4a3ba5dd6901b27ee83a91484
|
3 |
size 2490337809
|
{checkpoint-1600 β checkpoint-2700}/preprocessor_config.json
RENAMED
File without changes
|
{checkpoint-1500 β checkpoint-2700}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262063089
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7dbbb4678046f14503aa95af2d02c0cc6a2fb7071e468967b463cf71594e2914
|
3 |
size 1262063089
|
{checkpoint-1500 β checkpoint-2700}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9213f1eed5d75326eef26e352d35aa78d567c2885250ab304bce7d59c834157a
|
3 |
+
size 14631
|
{checkpoint-1500 β checkpoint-2700}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7970626d52baf6e9c965f8503c1a6318b3c55b1f5ba73fdd8b8209e28d5d529e
|
3 |
size 559
|
{checkpoint-1600 β checkpoint-2700}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89b50e7b743fe5caf734a580430810fca62d132f602e2a17e8e075ce20b82717
|
3 |
size 623
|
{checkpoint-1500 β checkpoint-2700}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -591,11 +591,479 @@
|
|
591 |
"eval_steps_per_second": 0.804,
|
592 |
"eval_wer": 0.18944081748509792,
|
593 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
594 |
}
|
595 |
],
|
596 |
"max_steps": 4550,
|
597 |
"num_train_epochs": 50,
|
598 |
-
"total_flos":
|
599 |
"trial_name": null,
|
600 |
"trial_params": null
|
601 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 29.666666666666668,
|
5 |
+
"global_step": 2700,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
591 |
"eval_steps_per_second": 0.804,
|
592 |
"eval_wer": 0.18944081748509792,
|
593 |
"step": 1500
|
594 |
+
},
|
595 |
+
{
|
596 |
+
"epoch": 16.7,
|
597 |
+
"learning_rate": 0.00019,
|
598 |
+
"loss": 1.0204,
|
599 |
+
"step": 1520
|
600 |
+
},
|
601 |
+
{
|
602 |
+
"epoch": 16.92,
|
603 |
+
"learning_rate": 0.00019250000000000002,
|
604 |
+
"loss": 1.0356,
|
605 |
+
"step": 1540
|
606 |
+
},
|
607 |
+
{
|
608 |
+
"epoch": 17.14,
|
609 |
+
"learning_rate": 0.00019500000000000002,
|
610 |
+
"loss": 1.0538,
|
611 |
+
"step": 1560
|
612 |
+
},
|
613 |
+
{
|
614 |
+
"epoch": 17.36,
|
615 |
+
"learning_rate": 0.0001975,
|
616 |
+
"loss": 1.0228,
|
617 |
+
"step": 1580
|
618 |
+
},
|
619 |
+
{
|
620 |
+
"epoch": 17.58,
|
621 |
+
"learning_rate": 0.0002,
|
622 |
+
"loss": 1.0193,
|
623 |
+
"step": 1600
|
624 |
+
},
|
625 |
+
{
|
626 |
+
"epoch": 17.58,
|
627 |
+
"eval_loss": 0.1991206556558609,
|
628 |
+
"eval_runtime": 191.1717,
|
629 |
+
"eval_samples_per_second": 25.333,
|
630 |
+
"eval_steps_per_second": 0.795,
|
631 |
+
"eval_wer": 0.17885325007096226,
|
632 |
+
"step": 1600
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 17.8,
|
636 |
+
"learning_rate": 0.00020250000000000002,
|
637 |
+
"loss": 1.0322,
|
638 |
+
"step": 1620
|
639 |
+
},
|
640 |
+
{
|
641 |
+
"epoch": 18.02,
|
642 |
+
"learning_rate": 0.000205,
|
643 |
+
"loss": 1.0176,
|
644 |
+
"step": 1640
|
645 |
+
},
|
646 |
+
{
|
647 |
+
"epoch": 18.24,
|
648 |
+
"learning_rate": 0.0002075,
|
649 |
+
"loss": 1.0272,
|
650 |
+
"step": 1660
|
651 |
+
},
|
652 |
+
{
|
653 |
+
"epoch": 18.46,
|
654 |
+
"learning_rate": 0.00021,
|
655 |
+
"loss": 0.9675,
|
656 |
+
"step": 1680
|
657 |
+
},
|
658 |
+
{
|
659 |
+
"epoch": 18.68,
|
660 |
+
"learning_rate": 0.0002125,
|
661 |
+
"loss": 0.9816,
|
662 |
+
"step": 1700
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 18.68,
|
666 |
+
"eval_loss": 0.19402356445789337,
|
667 |
+
"eval_runtime": 194.75,
|
668 |
+
"eval_samples_per_second": 24.868,
|
669 |
+
"eval_steps_per_second": 0.78,
|
670 |
+
"eval_wer": 0.18010218563724098,
|
671 |
+
"step": 1700
|
672 |
+
},
|
673 |
+
{
|
674 |
+
"epoch": 18.9,
|
675 |
+
"learning_rate": 0.000215,
|
676 |
+
"loss": 1.0023,
|
677 |
+
"step": 1720
|
678 |
+
},
|
679 |
+
{
|
680 |
+
"epoch": 19.12,
|
681 |
+
"learning_rate": 0.0002175,
|
682 |
+
"loss": 1.013,
|
683 |
+
"step": 1740
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 19.34,
|
687 |
+
"learning_rate": 0.00022,
|
688 |
+
"loss": 0.9664,
|
689 |
+
"step": 1760
|
690 |
+
},
|
691 |
+
{
|
692 |
+
"epoch": 19.56,
|
693 |
+
"learning_rate": 0.00022250000000000001,
|
694 |
+
"loss": 0.9736,
|
695 |
+
"step": 1780
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"epoch": 19.78,
|
699 |
+
"learning_rate": 0.00022500000000000002,
|
700 |
+
"loss": 0.9814,
|
701 |
+
"step": 1800
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"epoch": 19.78,
|
705 |
+
"eval_loss": 0.18596723675727844,
|
706 |
+
"eval_runtime": 188.4639,
|
707 |
+
"eval_samples_per_second": 25.697,
|
708 |
+
"eval_steps_per_second": 0.807,
|
709 |
+
"eval_wer": 0.16667612829974454,
|
710 |
+
"step": 1800
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 19.99,
|
714 |
+
"learning_rate": 0.0002275,
|
715 |
+
"loss": 1.0064,
|
716 |
+
"step": 1820
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 20.22,
|
720 |
+
"learning_rate": 0.00023,
|
721 |
+
"loss": 0.9583,
|
722 |
+
"step": 1840
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 20.44,
|
726 |
+
"learning_rate": 0.0002325,
|
727 |
+
"loss": 0.9646,
|
728 |
+
"step": 1860
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 20.66,
|
732 |
+
"learning_rate": 0.000235,
|
733 |
+
"loss": 0.9762,
|
734 |
+
"step": 1880
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 20.87,
|
738 |
+
"learning_rate": 0.0002375,
|
739 |
+
"loss": 0.9787,
|
740 |
+
"step": 1900
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 20.87,
|
744 |
+
"eval_loss": 0.18878202140331268,
|
745 |
+
"eval_runtime": 188.244,
|
746 |
+
"eval_samples_per_second": 25.727,
|
747 |
+
"eval_steps_per_second": 0.807,
|
748 |
+
"eval_wer": 0.16420664206642066,
|
749 |
+
"step": 1900
|
750 |
+
},
|
751 |
+
{
|
752 |
+
"epoch": 21.1,
|
753 |
+
"learning_rate": 0.00024,
|
754 |
+
"loss": 1.0218,
|
755 |
+
"step": 1920
|
756 |
+
},
|
757 |
+
{
|
758 |
+
"epoch": 21.32,
|
759 |
+
"learning_rate": 0.00024249999999999999,
|
760 |
+
"loss": 0.9505,
|
761 |
+
"step": 1940
|
762 |
+
},
|
763 |
+
{
|
764 |
+
"epoch": 21.54,
|
765 |
+
"learning_rate": 0.000245,
|
766 |
+
"loss": 0.9554,
|
767 |
+
"step": 1960
|
768 |
+
},
|
769 |
+
{
|
770 |
+
"epoch": 21.75,
|
771 |
+
"learning_rate": 0.0002475,
|
772 |
+
"loss": 0.9728,
|
773 |
+
"step": 1980
|
774 |
+
},
|
775 |
+
{
|
776 |
+
"epoch": 21.97,
|
777 |
+
"learning_rate": 0.00025,
|
778 |
+
"loss": 0.9699,
|
779 |
+
"step": 2000
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 21.97,
|
783 |
+
"eval_loss": 0.18748582899570465,
|
784 |
+
"eval_runtime": 190.7875,
|
785 |
+
"eval_samples_per_second": 25.384,
|
786 |
+
"eval_steps_per_second": 0.797,
|
787 |
+
"eval_wer": 0.17042293499858074,
|
788 |
+
"step": 2000
|
789 |
+
},
|
790 |
+
{
|
791 |
+
"epoch": 22.2,
|
792 |
+
"learning_rate": 0.00024803921568627453,
|
793 |
+
"loss": 0.9624,
|
794 |
+
"step": 2020
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"epoch": 22.42,
|
798 |
+
"learning_rate": 0.000246078431372549,
|
799 |
+
"loss": 0.9419,
|
800 |
+
"step": 2040
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 22.63,
|
804 |
+
"learning_rate": 0.00024411764705882354,
|
805 |
+
"loss": 0.9563,
|
806 |
+
"step": 2060
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"epoch": 22.85,
|
810 |
+
"learning_rate": 0.00024215686274509804,
|
811 |
+
"loss": 0.9643,
|
812 |
+
"step": 2080
|
813 |
+
},
|
814 |
+
{
|
815 |
+
"epoch": 23.08,
|
816 |
+
"learning_rate": 0.00024019607843137256,
|
817 |
+
"loss": 0.9616,
|
818 |
+
"step": 2100
|
819 |
+
},
|
820 |
+
{
|
821 |
+
"epoch": 23.08,
|
822 |
+
"eval_loss": 0.18017500638961792,
|
823 |
+
"eval_runtime": 191.3932,
|
824 |
+
"eval_samples_per_second": 25.304,
|
825 |
+
"eval_steps_per_second": 0.794,
|
826 |
+
"eval_wer": 0.16173715583309678,
|
827 |
+
"step": 2100
|
828 |
+
},
|
829 |
+
{
|
830 |
+
"epoch": 23.3,
|
831 |
+
"learning_rate": 0.00023823529411764704,
|
832 |
+
"loss": 0.917,
|
833 |
+
"step": 2120
|
834 |
+
},
|
835 |
+
{
|
836 |
+
"epoch": 23.51,
|
837 |
+
"learning_rate": 0.00023627450980392157,
|
838 |
+
"loss": 0.945,
|
839 |
+
"step": 2140
|
840 |
+
},
|
841 |
+
{
|
842 |
+
"epoch": 23.73,
|
843 |
+
"learning_rate": 0.0002343137254901961,
|
844 |
+
"loss": 0.9243,
|
845 |
+
"step": 2160
|
846 |
+
},
|
847 |
+
{
|
848 |
+
"epoch": 23.95,
|
849 |
+
"learning_rate": 0.0002323529411764706,
|
850 |
+
"loss": 0.9288,
|
851 |
+
"step": 2180
|
852 |
+
},
|
853 |
+
{
|
854 |
+
"epoch": 24.17,
|
855 |
+
"learning_rate": 0.0002303921568627451,
|
856 |
+
"loss": 0.9378,
|
857 |
+
"step": 2200
|
858 |
+
},
|
859 |
+
{
|
860 |
+
"epoch": 24.17,
|
861 |
+
"eval_loss": 0.17928896844387054,
|
862 |
+
"eval_runtime": 189.7619,
|
863 |
+
"eval_samples_per_second": 25.521,
|
864 |
+
"eval_steps_per_second": 0.801,
|
865 |
+
"eval_wer": 0.1577348850411581,
|
866 |
+
"step": 2200
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"epoch": 24.39,
|
870 |
+
"learning_rate": 0.0002284313725490196,
|
871 |
+
"loss": 0.9071,
|
872 |
+
"step": 2220
|
873 |
+
},
|
874 |
+
{
|
875 |
+
"epoch": 24.61,
|
876 |
+
"learning_rate": 0.00022647058823529412,
|
877 |
+
"loss": 0.9054,
|
878 |
+
"step": 2240
|
879 |
+
},
|
880 |
+
{
|
881 |
+
"epoch": 24.83,
|
882 |
+
"learning_rate": 0.0002246078431372549,
|
883 |
+
"loss": 0.9303,
|
884 |
+
"step": 2260
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 25.05,
|
888 |
+
"learning_rate": 0.00022264705882352943,
|
889 |
+
"loss": 0.9376,
|
890 |
+
"step": 2280
|
891 |
+
},
|
892 |
+
{
|
893 |
+
"epoch": 25.27,
|
894 |
+
"learning_rate": 0.0002206862745098039,
|
895 |
+
"loss": 0.888,
|
896 |
+
"step": 2300
|
897 |
+
},
|
898 |
+
{
|
899 |
+
"epoch": 25.27,
|
900 |
+
"eval_loss": 0.17642559111118317,
|
901 |
+
"eval_runtime": 187.0437,
|
902 |
+
"eval_samples_per_second": 25.892,
|
903 |
+
"eval_steps_per_second": 0.813,
|
904 |
+
"eval_wer": 0.15452739142776042,
|
905 |
+
"step": 2300
|
906 |
+
},
|
907 |
+
{
|
908 |
+
"epoch": 25.49,
|
909 |
+
"learning_rate": 0.00021872549019607843,
|
910 |
+
"loss": 0.9135,
|
911 |
+
"step": 2320
|
912 |
+
},
|
913 |
+
{
|
914 |
+
"epoch": 25.71,
|
915 |
+
"learning_rate": 0.00021676470588235294,
|
916 |
+
"loss": 0.9094,
|
917 |
+
"step": 2340
|
918 |
+
},
|
919 |
+
{
|
920 |
+
"epoch": 25.93,
|
921 |
+
"learning_rate": 0.00021480392156862746,
|
922 |
+
"loss": 0.8879,
|
923 |
+
"step": 2360
|
924 |
+
},
|
925 |
+
{
|
926 |
+
"epoch": 26.15,
|
927 |
+
"learning_rate": 0.00021284313725490196,
|
928 |
+
"loss": 0.929,
|
929 |
+
"step": 2380
|
930 |
+
},
|
931 |
+
{
|
932 |
+
"epoch": 26.37,
|
933 |
+
"learning_rate": 0.00021088235294117647,
|
934 |
+
"loss": 0.8942,
|
935 |
+
"step": 2400
|
936 |
+
},
|
937 |
+
{
|
938 |
+
"epoch": 26.37,
|
939 |
+
"eval_loss": 0.16744859516620636,
|
940 |
+
"eval_runtime": 190.6796,
|
941 |
+
"eval_samples_per_second": 25.399,
|
942 |
+
"eval_steps_per_second": 0.797,
|
943 |
+
"eval_wer": 0.14916264547260857,
|
944 |
+
"step": 2400
|
945 |
+
},
|
946 |
+
{
|
947 |
+
"epoch": 26.59,
|
948 |
+
"learning_rate": 0.000208921568627451,
|
949 |
+
"loss": 0.8717,
|
950 |
+
"step": 2420
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"epoch": 26.81,
|
954 |
+
"learning_rate": 0.0002069607843137255,
|
955 |
+
"loss": 0.8952,
|
956 |
+
"step": 2440
|
957 |
+
},
|
958 |
+
{
|
959 |
+
"epoch": 27.03,
|
960 |
+
"learning_rate": 0.000205,
|
961 |
+
"loss": 0.8849,
|
962 |
+
"step": 2460
|
963 |
+
},
|
964 |
+
{
|
965 |
+
"epoch": 27.25,
|
966 |
+
"learning_rate": 0.00020313725490196078,
|
967 |
+
"loss": 0.8781,
|
968 |
+
"step": 2480
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 27.47,
|
972 |
+
"learning_rate": 0.0002011764705882353,
|
973 |
+
"loss": 0.8701,
|
974 |
+
"step": 2500
|
975 |
+
},
|
976 |
+
{
|
977 |
+
"epoch": 27.47,
|
978 |
+
"eval_loss": 0.1738910973072052,
|
979 |
+
"eval_runtime": 187.7205,
|
980 |
+
"eval_samples_per_second": 25.799,
|
981 |
+
"eval_steps_per_second": 0.81,
|
982 |
+
"eval_wer": 0.1511779733181947,
|
983 |
+
"step": 2500
|
984 |
+
},
|
985 |
+
{
|
986 |
+
"epoch": 27.69,
|
987 |
+
"learning_rate": 0.0001992156862745098,
|
988 |
+
"loss": 0.8674,
|
989 |
+
"step": 2520
|
990 |
+
},
|
991 |
+
{
|
992 |
+
"epoch": 27.91,
|
993 |
+
"learning_rate": 0.00019725490196078433,
|
994 |
+
"loss": 0.8752,
|
995 |
+
"step": 2540
|
996 |
+
},
|
997 |
+
{
|
998 |
+
"epoch": 28.13,
|
999 |
+
"learning_rate": 0.0001952941176470588,
|
1000 |
+
"loss": 0.91,
|
1001 |
+
"step": 2560
|
1002 |
+
},
|
1003 |
+
{
|
1004 |
+
"epoch": 28.35,
|
1005 |
+
"learning_rate": 0.00019333333333333333,
|
1006 |
+
"loss": 0.8693,
|
1007 |
+
"step": 2580
|
1008 |
+
},
|
1009 |
+
{
|
1010 |
+
"epoch": 28.57,
|
1011 |
+
"learning_rate": 0.00019137254901960786,
|
1012 |
+
"loss": 0.8555,
|
1013 |
+
"step": 2600
|
1014 |
+
},
|
1015 |
+
{
|
1016 |
+
"epoch": 28.57,
|
1017 |
+
"eval_loss": 0.1689654141664505,
|
1018 |
+
"eval_runtime": 196.2665,
|
1019 |
+
"eval_samples_per_second": 24.676,
|
1020 |
+
"eval_steps_per_second": 0.774,
|
1021 |
+
"eval_wer": 0.14459267669599773,
|
1022 |
+
"step": 2600
|
1023 |
+
},
|
1024 |
+
{
|
1025 |
+
"epoch": 28.79,
|
1026 |
+
"learning_rate": 0.00018941176470588236,
|
1027 |
+
"loss": 0.8796,
|
1028 |
+
"step": 2620
|
1029 |
+
},
|
1030 |
+
{
|
1031 |
+
"epoch": 29.01,
|
1032 |
+
"learning_rate": 0.00018745098039215686,
|
1033 |
+
"loss": 0.9045,
|
1034 |
+
"step": 2640
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"epoch": 29.23,
|
1038 |
+
"learning_rate": 0.00018549019607843137,
|
1039 |
+
"loss": 0.8515,
|
1040 |
+
"step": 2660
|
1041 |
+
},
|
1042 |
+
{
|
1043 |
+
"epoch": 29.45,
|
1044 |
+
"learning_rate": 0.0001835294117647059,
|
1045 |
+
"loss": 0.861,
|
1046 |
+
"step": 2680
|
1047 |
+
},
|
1048 |
+
{
|
1049 |
+
"epoch": 29.67,
|
1050 |
+
"learning_rate": 0.0001815686274509804,
|
1051 |
+
"loss": 0.8513,
|
1052 |
+
"step": 2700
|
1053 |
+
},
|
1054 |
+
{
|
1055 |
+
"epoch": 29.67,
|
1056 |
+
"eval_loss": 0.16488835215568542,
|
1057 |
+
"eval_runtime": 189.1938,
|
1058 |
+
"eval_samples_per_second": 25.598,
|
1059 |
+
"eval_steps_per_second": 0.803,
|
1060 |
+
"eval_wer": 0.14774340051092819,
|
1061 |
+
"step": 2700
|
1062 |
}
|
1063 |
],
|
1064 |
"max_steps": 4550,
|
1065 |
"num_train_epochs": 50,
|
1066 |
+
"total_flos": 4.176183820424104e+19,
|
1067 |
"trial_name": null,
|
1068 |
"trial_params": null
|
1069 |
}
|
{checkpoint-1600 β checkpoint-2700}/training_args.bin
RENAMED
File without changes
|