Training in progress, step 1850, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1370666272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a44dfb5e264781c6f6c2ec17953b55c8d56028cea17c18c1ea00e1a273ca0df
|
3 |
size 1370666272
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 697294462
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64d8002ffbd9ad944ead17d83c487490d6c027e65dfe6f984e192a6959e76693
|
3 |
size 697294462
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1839c408b2800d1f16254de5db0d477776bbfae78a9c676838bcb325c436cdf1
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12607,6 +12607,356 @@
|
|
12607 |
"learning_rate": 0.00019658880674543004,
|
12608 |
"loss": 0.9393,
|
12609 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12610 |
}
|
12611 |
],
|
12612 |
"logging_steps": 1,
|
@@ -12626,7 +12976,7 @@
|
|
12626 |
"attributes": {}
|
12627 |
}
|
12628 |
},
|
12629 |
-
"total_flos":
|
12630 |
"train_batch_size": 32,
|
12631 |
"trial_name": null,
|
12632 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.4293837762562377,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1850,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12607 |
"learning_rate": 0.00019658880674543004,
|
12608 |
"loss": 0.9393,
|
12609 |
"step": 1800
|
12610 |
+
},
|
12611 |
+
{
|
12612 |
+
"epoch": 0.4180109086689103,
|
12613 |
+
"grad_norm": 0.5023618936538696,
|
12614 |
+
"learning_rate": 0.00019658502880731609,
|
12615 |
+
"loss": 0.8936,
|
12616 |
+
"step": 1801
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 0.4182430080074272,
|
12620 |
+
"grad_norm": 0.7162852883338928,
|
12621 |
+
"learning_rate": 0.00019658124881464525,
|
12622 |
+
"loss": 0.9267,
|
12623 |
+
"step": 1802
|
12624 |
+
},
|
12625 |
+
{
|
12626 |
+
"epoch": 0.41847510734594406,
|
12627 |
+
"grad_norm": 0.6218928694725037,
|
12628 |
+
"learning_rate": 0.000196577466767498,
|
12629 |
+
"loss": 0.9263,
|
12630 |
+
"step": 1803
|
12631 |
+
},
|
12632 |
+
{
|
12633 |
+
"epoch": 0.41870720668446093,
|
12634 |
+
"grad_norm": 0.5862722396850586,
|
12635 |
+
"learning_rate": 0.00019657368266595476,
|
12636 |
+
"loss": 0.8879,
|
12637 |
+
"step": 1804
|
12638 |
+
},
|
12639 |
+
{
|
12640 |
+
"epoch": 0.41893930602297785,
|
12641 |
+
"grad_norm": 0.5510787963867188,
|
12642 |
+
"learning_rate": 0.00019656989651009607,
|
12643 |
+
"loss": 0.9052,
|
12644 |
+
"step": 1805
|
12645 |
+
},
|
12646 |
+
{
|
12647 |
+
"epoch": 0.4191714053614947,
|
12648 |
+
"grad_norm": 0.6477943062782288,
|
12649 |
+
"learning_rate": 0.00019656610830000242,
|
12650 |
+
"loss": 0.8852,
|
12651 |
+
"step": 1806
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 0.4194035047000116,
|
12655 |
+
"grad_norm": 0.5812867283821106,
|
12656 |
+
"learning_rate": 0.0001965623180357544,
|
12657 |
+
"loss": 0.946,
|
12658 |
+
"step": 1807
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 0.4196356040385285,
|
12662 |
+
"grad_norm": 0.5934841632843018,
|
12663 |
+
"learning_rate": 0.00019655852571743264,
|
12664 |
+
"loss": 0.9071,
|
12665 |
+
"step": 1808
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 0.4198677033770454,
|
12669 |
+
"grad_norm": 0.5561374425888062,
|
12670 |
+
"learning_rate": 0.00019655473134511784,
|
12671 |
+
"loss": 0.927,
|
12672 |
+
"step": 1809
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 0.42009980271556224,
|
12676 |
+
"grad_norm": 1.1915708780288696,
|
12677 |
+
"learning_rate": 0.00019655093491889068,
|
12678 |
+
"loss": 0.8689,
|
12679 |
+
"step": 1810
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 0.42033190205407917,
|
12683 |
+
"grad_norm": 0.6147586107254028,
|
12684 |
+
"learning_rate": 0.00019654713643883193,
|
12685 |
+
"loss": 0.884,
|
12686 |
+
"step": 1811
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 0.42056400139259603,
|
12690 |
+
"grad_norm": 0.5104571580886841,
|
12691 |
+
"learning_rate": 0.00019654333590502234,
|
12692 |
+
"loss": 0.9142,
|
12693 |
+
"step": 1812
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 0.4207961007311129,
|
12697 |
+
"grad_norm": 0.5579031705856323,
|
12698 |
+
"learning_rate": 0.00019653953331754285,
|
12699 |
+
"loss": 0.9324,
|
12700 |
+
"step": 1813
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 0.4210282000696298,
|
12704 |
+
"grad_norm": 0.5296502709388733,
|
12705 |
+
"learning_rate": 0.00019653572867647426,
|
12706 |
+
"loss": 0.8842,
|
12707 |
+
"step": 1814
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 0.4212602994081467,
|
12711 |
+
"grad_norm": 0.5059155821800232,
|
12712 |
+
"learning_rate": 0.0001965319219818976,
|
12713 |
+
"loss": 0.9587,
|
12714 |
+
"step": 1815
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 0.42149239874666355,
|
12718 |
+
"grad_norm": 0.565586507320404,
|
12719 |
+
"learning_rate": 0.00019652811323389376,
|
12720 |
+
"loss": 0.9126,
|
12721 |
+
"step": 1816
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 0.4217244980851805,
|
12725 |
+
"grad_norm": 0.5331727266311646,
|
12726 |
+
"learning_rate": 0.00019652430243254377,
|
12727 |
+
"loss": 0.9126,
|
12728 |
+
"step": 1817
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 0.42195659742369734,
|
12732 |
+
"grad_norm": 0.677875816822052,
|
12733 |
+
"learning_rate": 0.00019652048957792876,
|
12734 |
+
"loss": 0.9316,
|
12735 |
+
"step": 1818
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 0.4221886967622142,
|
12739 |
+
"grad_norm": 0.5276561975479126,
|
12740 |
+
"learning_rate": 0.00019651667467012978,
|
12741 |
+
"loss": 0.8651,
|
12742 |
+
"step": 1819
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 0.42242079610073113,
|
12746 |
+
"grad_norm": 0.5190433859825134,
|
12747 |
+
"learning_rate": 0.00019651285770922797,
|
12748 |
+
"loss": 0.917,
|
12749 |
+
"step": 1820
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 0.422652895439248,
|
12753 |
+
"grad_norm": 0.5006272792816162,
|
12754 |
+
"learning_rate": 0.00019650903869530454,
|
12755 |
+
"loss": 0.9645,
|
12756 |
+
"step": 1821
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 0.42288499477776487,
|
12760 |
+
"grad_norm": 0.5366963148117065,
|
12761 |
+
"learning_rate": 0.00019650521762844077,
|
12762 |
+
"loss": 0.912,
|
12763 |
+
"step": 1822
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 0.4231170941162818,
|
12767 |
+
"grad_norm": 0.4790819585323334,
|
12768 |
+
"learning_rate": 0.00019650139450871787,
|
12769 |
+
"loss": 0.9122,
|
12770 |
+
"step": 1823
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 0.42334919345479866,
|
12774 |
+
"grad_norm": 0.5402861833572388,
|
12775 |
+
"learning_rate": 0.0001964975693362172,
|
12776 |
+
"loss": 0.955,
|
12777 |
+
"step": 1824
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 0.4235812927933155,
|
12781 |
+
"grad_norm": 0.5376525521278381,
|
12782 |
+
"learning_rate": 0.00019649374211102013,
|
12783 |
+
"loss": 0.8837,
|
12784 |
+
"step": 1825
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 0.42381339213183244,
|
12788 |
+
"grad_norm": 0.5535842180252075,
|
12789 |
+
"learning_rate": 0.0001964899128332081,
|
12790 |
+
"loss": 0.9001,
|
12791 |
+
"step": 1826
|
12792 |
+
},
|
12793 |
+
{
|
12794 |
+
"epoch": 0.4240454914703493,
|
12795 |
+
"grad_norm": 0.5926379561424255,
|
12796 |
+
"learning_rate": 0.0001964860815028625,
|
12797 |
+
"loss": 0.9202,
|
12798 |
+
"step": 1827
|
12799 |
+
},
|
12800 |
+
{
|
12801 |
+
"epoch": 0.4242775908088662,
|
12802 |
+
"grad_norm": 0.547949492931366,
|
12803 |
+
"learning_rate": 0.0001964822481200649,
|
12804 |
+
"loss": 0.9076,
|
12805 |
+
"step": 1828
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 0.4245096901473831,
|
12809 |
+
"grad_norm": 0.4945877194404602,
|
12810 |
+
"learning_rate": 0.00019647841268489682,
|
12811 |
+
"loss": 0.9013,
|
12812 |
+
"step": 1829
|
12813 |
+
},
|
12814 |
+
{
|
12815 |
+
"epoch": 0.42474178948589997,
|
12816 |
+
"grad_norm": 0.4917088449001312,
|
12817 |
+
"learning_rate": 0.0001964745751974398,
|
12818 |
+
"loss": 0.9332,
|
12819 |
+
"step": 1830
|
12820 |
+
},
|
12821 |
+
{
|
12822 |
+
"epoch": 0.42497388882441683,
|
12823 |
+
"grad_norm": 0.5218760967254639,
|
12824 |
+
"learning_rate": 0.00019647073565777555,
|
12825 |
+
"loss": 0.9954,
|
12826 |
+
"step": 1831
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 0.42520598816293376,
|
12830 |
+
"grad_norm": 0.5042544007301331,
|
12831 |
+
"learning_rate": 0.00019646689406598567,
|
12832 |
+
"loss": 0.9127,
|
12833 |
+
"step": 1832
|
12834 |
+
},
|
12835 |
+
{
|
12836 |
+
"epoch": 0.4254380875014506,
|
12837 |
+
"grad_norm": 0.4927321672439575,
|
12838 |
+
"learning_rate": 0.00019646305042215193,
|
12839 |
+
"loss": 0.9304,
|
12840 |
+
"step": 1833
|
12841 |
+
},
|
12842 |
+
{
|
12843 |
+
"epoch": 0.4256701868399675,
|
12844 |
+
"grad_norm": 0.5180162191390991,
|
12845 |
+
"learning_rate": 0.00019645920472635608,
|
12846 |
+
"loss": 0.9806,
|
12847 |
+
"step": 1834
|
12848 |
+
},
|
12849 |
+
{
|
12850 |
+
"epoch": 0.4259022861784844,
|
12851 |
+
"grad_norm": 0.5972525477409363,
|
12852 |
+
"learning_rate": 0.0001964553569786799,
|
12853 |
+
"loss": 0.8653,
|
12854 |
+
"step": 1835
|
12855 |
+
},
|
12856 |
+
{
|
12857 |
+
"epoch": 0.4261343855170013,
|
12858 |
+
"grad_norm": 0.5696606636047363,
|
12859 |
+
"learning_rate": 0.00019645150717920528,
|
12860 |
+
"loss": 0.9044,
|
12861 |
+
"step": 1836
|
12862 |
+
},
|
12863 |
+
{
|
12864 |
+
"epoch": 0.42636648485551815,
|
12865 |
+
"grad_norm": 0.6031782031059265,
|
12866 |
+
"learning_rate": 0.00019644765532801412,
|
12867 |
+
"loss": 0.9033,
|
12868 |
+
"step": 1837
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 0.42659858419403507,
|
12872 |
+
"grad_norm": 0.5550394654273987,
|
12873 |
+
"learning_rate": 0.0001964438014251883,
|
12874 |
+
"loss": 0.9123,
|
12875 |
+
"step": 1838
|
12876 |
+
},
|
12877 |
+
{
|
12878 |
+
"epoch": 0.42683068353255194,
|
12879 |
+
"grad_norm": 0.6705706119537354,
|
12880 |
+
"learning_rate": 0.00019643994547080982,
|
12881 |
+
"loss": 0.9248,
|
12882 |
+
"step": 1839
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 0.4270627828710688,
|
12886 |
+
"grad_norm": 0.6391993761062622,
|
12887 |
+
"learning_rate": 0.0001964360874649607,
|
12888 |
+
"loss": 0.9417,
|
12889 |
+
"step": 1840
|
12890 |
+
},
|
12891 |
+
{
|
12892 |
+
"epoch": 0.4272948822095857,
|
12893 |
+
"grad_norm": 0.4748203754425049,
|
12894 |
+
"learning_rate": 0.0001964322274077231,
|
12895 |
+
"loss": 0.9371,
|
12896 |
+
"step": 1841
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 0.4275269815481026,
|
12900 |
+
"grad_norm": 0.6255447864532471,
|
12901 |
+
"learning_rate": 0.000196428365299179,
|
12902 |
+
"loss": 0.8876,
|
12903 |
+
"step": 1842
|
12904 |
+
},
|
12905 |
+
{
|
12906 |
+
"epoch": 0.42775908088661946,
|
12907 |
+
"grad_norm": 0.5568214058876038,
|
12908 |
+
"learning_rate": 0.00019642450113941057,
|
12909 |
+
"loss": 0.9473,
|
12910 |
+
"step": 1843
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 0.4279911802251364,
|
12914 |
+
"grad_norm": 0.609748363494873,
|
12915 |
+
"learning_rate": 0.0001964206349285001,
|
12916 |
+
"loss": 0.897,
|
12917 |
+
"step": 1844
|
12918 |
+
},
|
12919 |
+
{
|
12920 |
+
"epoch": 0.42822327956365325,
|
12921 |
+
"grad_norm": 0.6173092722892761,
|
12922 |
+
"learning_rate": 0.00019641676666652978,
|
12923 |
+
"loss": 0.9228,
|
12924 |
+
"step": 1845
|
12925 |
+
},
|
12926 |
+
{
|
12927 |
+
"epoch": 0.4284553789021701,
|
12928 |
+
"grad_norm": 0.5989368557929993,
|
12929 |
+
"learning_rate": 0.00019641289635358186,
|
12930 |
+
"loss": 0.9863,
|
12931 |
+
"step": 1846
|
12932 |
+
},
|
12933 |
+
{
|
12934 |
+
"epoch": 0.42868747824068704,
|
12935 |
+
"grad_norm": 0.651771605014801,
|
12936 |
+
"learning_rate": 0.00019640902398973873,
|
12937 |
+
"loss": 0.9606,
|
12938 |
+
"step": 1847
|
12939 |
+
},
|
12940 |
+
{
|
12941 |
+
"epoch": 0.4289195775792039,
|
12942 |
+
"grad_norm": 0.6313779354095459,
|
12943 |
+
"learning_rate": 0.0001964051495750827,
|
12944 |
+
"loss": 0.9273,
|
12945 |
+
"step": 1848
|
12946 |
+
},
|
12947 |
+
{
|
12948 |
+
"epoch": 0.42915167691772077,
|
12949 |
+
"grad_norm": 0.5805069208145142,
|
12950 |
+
"learning_rate": 0.00019640127310969628,
|
12951 |
+
"loss": 0.8874,
|
12952 |
+
"step": 1849
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 0.4293837762562377,
|
12956 |
+
"grad_norm": 0.6379398107528687,
|
12957 |
+
"learning_rate": 0.00019639739459366182,
|
12958 |
+
"loss": 0.9533,
|
12959 |
+
"step": 1850
|
12960 |
}
|
12961 |
],
|
12962 |
"logging_steps": 1,
|
|
|
12976 |
"attributes": {}
|
12977 |
}
|
12978 |
},
|
12979 |
+
"total_flos": 8.211719251820544e+17,
|
12980 |
"train_batch_size": 32,
|
12981 |
"trial_name": null,
|
12982 |
"trial_params": null
|