mosama commited on
Commit
de874de
·
verified ·
1 Parent(s): debf78d

Training in progress, step 1850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c19a8c4ff78657e1ad6849d03827d74a166332e2c92b5ee4c34966f79e091caa
3
  size 1370666272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a44dfb5e264781c6f6c2ec17953b55c8d56028cea17c18c1ea00e1a273ca0df
3
  size 1370666272
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10c0470d53e83293b301fcaf8b6ed1125194ec8f54fe9618703c1367bf9a41e7
3
  size 697294462
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d8002ffbd9ad944ead17d83c487490d6c027e65dfe6f984e192a6959e76693
3
  size 697294462
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d290a2c62404485bacce37c57039bbf078af94d6cf0884b19d2a678f11aec096
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1839c408b2800d1f16254de5db0d477776bbfae78a9c676838bcb325c436cdf1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4177788093303934,
5
  "eval_steps": 500,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12607,6 +12607,356 @@
12607
  "learning_rate": 0.00019658880674543004,
12608
  "loss": 0.9393,
12609
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12610
  }
12611
  ],
12612
  "logging_steps": 1,
@@ -12626,7 +12976,7 @@
12626
  "attributes": {}
12627
  }
12628
  },
12629
- "total_flos": 7.989780893663232e+17,
12630
  "train_batch_size": 32,
12631
  "trial_name": null,
12632
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4293837762562377,
5
  "eval_steps": 500,
6
+ "global_step": 1850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12607
  "learning_rate": 0.00019658880674543004,
12608
  "loss": 0.9393,
12609
  "step": 1800
12610
+ },
12611
+ {
12612
+ "epoch": 0.4180109086689103,
12613
+ "grad_norm": 0.5023618936538696,
12614
+ "learning_rate": 0.00019658502880731609,
12615
+ "loss": 0.8936,
12616
+ "step": 1801
12617
+ },
12618
+ {
12619
+ "epoch": 0.4182430080074272,
12620
+ "grad_norm": 0.7162852883338928,
12621
+ "learning_rate": 0.00019658124881464525,
12622
+ "loss": 0.9267,
12623
+ "step": 1802
12624
+ },
12625
+ {
12626
+ "epoch": 0.41847510734594406,
12627
+ "grad_norm": 0.6218928694725037,
12628
+ "learning_rate": 0.000196577466767498,
12629
+ "loss": 0.9263,
12630
+ "step": 1803
12631
+ },
12632
+ {
12633
+ "epoch": 0.41870720668446093,
12634
+ "grad_norm": 0.5862722396850586,
12635
+ "learning_rate": 0.00019657368266595476,
12636
+ "loss": 0.8879,
12637
+ "step": 1804
12638
+ },
12639
+ {
12640
+ "epoch": 0.41893930602297785,
12641
+ "grad_norm": 0.5510787963867188,
12642
+ "learning_rate": 0.00019656989651009607,
12643
+ "loss": 0.9052,
12644
+ "step": 1805
12645
+ },
12646
+ {
12647
+ "epoch": 0.4191714053614947,
12648
+ "grad_norm": 0.6477943062782288,
12649
+ "learning_rate": 0.00019656610830000242,
12650
+ "loss": 0.8852,
12651
+ "step": 1806
12652
+ },
12653
+ {
12654
+ "epoch": 0.4194035047000116,
12655
+ "grad_norm": 0.5812867283821106,
12656
+ "learning_rate": 0.0001965623180357544,
12657
+ "loss": 0.946,
12658
+ "step": 1807
12659
+ },
12660
+ {
12661
+ "epoch": 0.4196356040385285,
12662
+ "grad_norm": 0.5934841632843018,
12663
+ "learning_rate": 0.00019655852571743264,
12664
+ "loss": 0.9071,
12665
+ "step": 1808
12666
+ },
12667
+ {
12668
+ "epoch": 0.4198677033770454,
12669
+ "grad_norm": 0.5561374425888062,
12670
+ "learning_rate": 0.00019655473134511784,
12671
+ "loss": 0.927,
12672
+ "step": 1809
12673
+ },
12674
+ {
12675
+ "epoch": 0.42009980271556224,
12676
+ "grad_norm": 1.1915708780288696,
12677
+ "learning_rate": 0.00019655093491889068,
12678
+ "loss": 0.8689,
12679
+ "step": 1810
12680
+ },
12681
+ {
12682
+ "epoch": 0.42033190205407917,
12683
+ "grad_norm": 0.6147586107254028,
12684
+ "learning_rate": 0.00019654713643883193,
12685
+ "loss": 0.884,
12686
+ "step": 1811
12687
+ },
12688
+ {
12689
+ "epoch": 0.42056400139259603,
12690
+ "grad_norm": 0.5104571580886841,
12691
+ "learning_rate": 0.00019654333590502234,
12692
+ "loss": 0.9142,
12693
+ "step": 1812
12694
+ },
12695
+ {
12696
+ "epoch": 0.4207961007311129,
12697
+ "grad_norm": 0.5579031705856323,
12698
+ "learning_rate": 0.00019653953331754285,
12699
+ "loss": 0.9324,
12700
+ "step": 1813
12701
+ },
12702
+ {
12703
+ "epoch": 0.4210282000696298,
12704
+ "grad_norm": 0.5296502709388733,
12705
+ "learning_rate": 0.00019653572867647426,
12706
+ "loss": 0.8842,
12707
+ "step": 1814
12708
+ },
12709
+ {
12710
+ "epoch": 0.4212602994081467,
12711
+ "grad_norm": 0.5059155821800232,
12712
+ "learning_rate": 0.0001965319219818976,
12713
+ "loss": 0.9587,
12714
+ "step": 1815
12715
+ },
12716
+ {
12717
+ "epoch": 0.42149239874666355,
12718
+ "grad_norm": 0.565586507320404,
12719
+ "learning_rate": 0.00019652811323389376,
12720
+ "loss": 0.9126,
12721
+ "step": 1816
12722
+ },
12723
+ {
12724
+ "epoch": 0.4217244980851805,
12725
+ "grad_norm": 0.5331727266311646,
12726
+ "learning_rate": 0.00019652430243254377,
12727
+ "loss": 0.9126,
12728
+ "step": 1817
12729
+ },
12730
+ {
12731
+ "epoch": 0.42195659742369734,
12732
+ "grad_norm": 0.677875816822052,
12733
+ "learning_rate": 0.00019652048957792876,
12734
+ "loss": 0.9316,
12735
+ "step": 1818
12736
+ },
12737
+ {
12738
+ "epoch": 0.4221886967622142,
12739
+ "grad_norm": 0.5276561975479126,
12740
+ "learning_rate": 0.00019651667467012978,
12741
+ "loss": 0.8651,
12742
+ "step": 1819
12743
+ },
12744
+ {
12745
+ "epoch": 0.42242079610073113,
12746
+ "grad_norm": 0.5190433859825134,
12747
+ "learning_rate": 0.00019651285770922797,
12748
+ "loss": 0.917,
12749
+ "step": 1820
12750
+ },
12751
+ {
12752
+ "epoch": 0.422652895439248,
12753
+ "grad_norm": 0.5006272792816162,
12754
+ "learning_rate": 0.00019650903869530454,
12755
+ "loss": 0.9645,
12756
+ "step": 1821
12757
+ },
12758
+ {
12759
+ "epoch": 0.42288499477776487,
12760
+ "grad_norm": 0.5366963148117065,
12761
+ "learning_rate": 0.00019650521762844077,
12762
+ "loss": 0.912,
12763
+ "step": 1822
12764
+ },
12765
+ {
12766
+ "epoch": 0.4231170941162818,
12767
+ "grad_norm": 0.4790819585323334,
12768
+ "learning_rate": 0.00019650139450871787,
12769
+ "loss": 0.9122,
12770
+ "step": 1823
12771
+ },
12772
+ {
12773
+ "epoch": 0.42334919345479866,
12774
+ "grad_norm": 0.5402861833572388,
12775
+ "learning_rate": 0.0001964975693362172,
12776
+ "loss": 0.955,
12777
+ "step": 1824
12778
+ },
12779
+ {
12780
+ "epoch": 0.4235812927933155,
12781
+ "grad_norm": 0.5376525521278381,
12782
+ "learning_rate": 0.00019649374211102013,
12783
+ "loss": 0.8837,
12784
+ "step": 1825
12785
+ },
12786
+ {
12787
+ "epoch": 0.42381339213183244,
12788
+ "grad_norm": 0.5535842180252075,
12789
+ "learning_rate": 0.0001964899128332081,
12790
+ "loss": 0.9001,
12791
+ "step": 1826
12792
+ },
12793
+ {
12794
+ "epoch": 0.4240454914703493,
12795
+ "grad_norm": 0.5926379561424255,
12796
+ "learning_rate": 0.0001964860815028625,
12797
+ "loss": 0.9202,
12798
+ "step": 1827
12799
+ },
12800
+ {
12801
+ "epoch": 0.4242775908088662,
12802
+ "grad_norm": 0.547949492931366,
12803
+ "learning_rate": 0.0001964822481200649,
12804
+ "loss": 0.9076,
12805
+ "step": 1828
12806
+ },
12807
+ {
12808
+ "epoch": 0.4245096901473831,
12809
+ "grad_norm": 0.4945877194404602,
12810
+ "learning_rate": 0.00019647841268489682,
12811
+ "loss": 0.9013,
12812
+ "step": 1829
12813
+ },
12814
+ {
12815
+ "epoch": 0.42474178948589997,
12816
+ "grad_norm": 0.4917088449001312,
12817
+ "learning_rate": 0.0001964745751974398,
12818
+ "loss": 0.9332,
12819
+ "step": 1830
12820
+ },
12821
+ {
12822
+ "epoch": 0.42497388882441683,
12823
+ "grad_norm": 0.5218760967254639,
12824
+ "learning_rate": 0.00019647073565777555,
12825
+ "loss": 0.9954,
12826
+ "step": 1831
12827
+ },
12828
+ {
12829
+ "epoch": 0.42520598816293376,
12830
+ "grad_norm": 0.5042544007301331,
12831
+ "learning_rate": 0.00019646689406598567,
12832
+ "loss": 0.9127,
12833
+ "step": 1832
12834
+ },
12835
+ {
12836
+ "epoch": 0.4254380875014506,
12837
+ "grad_norm": 0.4927321672439575,
12838
+ "learning_rate": 0.00019646305042215193,
12839
+ "loss": 0.9304,
12840
+ "step": 1833
12841
+ },
12842
+ {
12843
+ "epoch": 0.4256701868399675,
12844
+ "grad_norm": 0.5180162191390991,
12845
+ "learning_rate": 0.00019645920472635608,
12846
+ "loss": 0.9806,
12847
+ "step": 1834
12848
+ },
12849
+ {
12850
+ "epoch": 0.4259022861784844,
12851
+ "grad_norm": 0.5972525477409363,
12852
+ "learning_rate": 0.0001964553569786799,
12853
+ "loss": 0.8653,
12854
+ "step": 1835
12855
+ },
12856
+ {
12857
+ "epoch": 0.4261343855170013,
12858
+ "grad_norm": 0.5696606636047363,
12859
+ "learning_rate": 0.00019645150717920528,
12860
+ "loss": 0.9044,
12861
+ "step": 1836
12862
+ },
12863
+ {
12864
+ "epoch": 0.42636648485551815,
12865
+ "grad_norm": 0.6031782031059265,
12866
+ "learning_rate": 0.00019644765532801412,
12867
+ "loss": 0.9033,
12868
+ "step": 1837
12869
+ },
12870
+ {
12871
+ "epoch": 0.42659858419403507,
12872
+ "grad_norm": 0.5550394654273987,
12873
+ "learning_rate": 0.0001964438014251883,
12874
+ "loss": 0.9123,
12875
+ "step": 1838
12876
+ },
12877
+ {
12878
+ "epoch": 0.42683068353255194,
12879
+ "grad_norm": 0.6705706119537354,
12880
+ "learning_rate": 0.00019643994547080982,
12881
+ "loss": 0.9248,
12882
+ "step": 1839
12883
+ },
12884
+ {
12885
+ "epoch": 0.4270627828710688,
12886
+ "grad_norm": 0.6391993761062622,
12887
+ "learning_rate": 0.0001964360874649607,
12888
+ "loss": 0.9417,
12889
+ "step": 1840
12890
+ },
12891
+ {
12892
+ "epoch": 0.4272948822095857,
12893
+ "grad_norm": 0.4748203754425049,
12894
+ "learning_rate": 0.0001964322274077231,
12895
+ "loss": 0.9371,
12896
+ "step": 1841
12897
+ },
12898
+ {
12899
+ "epoch": 0.4275269815481026,
12900
+ "grad_norm": 0.6255447864532471,
12901
+ "learning_rate": 0.000196428365299179,
12902
+ "loss": 0.8876,
12903
+ "step": 1842
12904
+ },
12905
+ {
12906
+ "epoch": 0.42775908088661946,
12907
+ "grad_norm": 0.5568214058876038,
12908
+ "learning_rate": 0.00019642450113941057,
12909
+ "loss": 0.9473,
12910
+ "step": 1843
12911
+ },
12912
+ {
12913
+ "epoch": 0.4279911802251364,
12914
+ "grad_norm": 0.609748363494873,
12915
+ "learning_rate": 0.0001964206349285001,
12916
+ "loss": 0.897,
12917
+ "step": 1844
12918
+ },
12919
+ {
12920
+ "epoch": 0.42822327956365325,
12921
+ "grad_norm": 0.6173092722892761,
12922
+ "learning_rate": 0.00019641676666652978,
12923
+ "loss": 0.9228,
12924
+ "step": 1845
12925
+ },
12926
+ {
12927
+ "epoch": 0.4284553789021701,
12928
+ "grad_norm": 0.5989368557929993,
12929
+ "learning_rate": 0.00019641289635358186,
12930
+ "loss": 0.9863,
12931
+ "step": 1846
12932
+ },
12933
+ {
12934
+ "epoch": 0.42868747824068704,
12935
+ "grad_norm": 0.651771605014801,
12936
+ "learning_rate": 0.00019640902398973873,
12937
+ "loss": 0.9606,
12938
+ "step": 1847
12939
+ },
12940
+ {
12941
+ "epoch": 0.4289195775792039,
12942
+ "grad_norm": 0.6313779354095459,
12943
+ "learning_rate": 0.0001964051495750827,
12944
+ "loss": 0.9273,
12945
+ "step": 1848
12946
+ },
12947
+ {
12948
+ "epoch": 0.42915167691772077,
12949
+ "grad_norm": 0.5805069208145142,
12950
+ "learning_rate": 0.00019640127310969628,
12951
+ "loss": 0.8874,
12952
+ "step": 1849
12953
+ },
12954
+ {
12955
+ "epoch": 0.4293837762562377,
12956
+ "grad_norm": 0.6379398107528687,
12957
+ "learning_rate": 0.00019639739459366182,
12958
+ "loss": 0.9533,
12959
+ "step": 1850
12960
  }
12961
  ],
12962
  "logging_steps": 1,
 
12976
  "attributes": {}
12977
  }
12978
  },
12979
+ "total_flos": 8.211719251820544e+17,
12980
  "train_batch_size": 32,
12981
  "trial_name": null,
12982
  "trial_params": null