cline_emanuals / trainer_state.json
AnonymousSub's picture
first commit
d732f01
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.15092095071321246,
"global_step": 204000,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000739808581927512,
"learning_rate": 1e-05,
"loss": 9.4473583984375,
"step": 1000
},
{
"epoch": 0.001479617163855024,
"learning_rate": 2e-05,
"loss": 4.761673828125,
"step": 2000
},
{
"epoch": 0.002219425745782536,
"learning_rate": 3e-05,
"loss": 3.3295283203125,
"step": 3000
},
{
"epoch": 0.002959234327710048,
"learning_rate": 4e-05,
"loss": 2.793072265625,
"step": 4000
},
{
"epoch": 0.00369904290963756,
"learning_rate": 5e-05,
"loss": 2.51521875,
"step": 5000
},
{
"epoch": 0.004438851491565072,
"learning_rate": 6e-05,
"loss": 2.3833671875,
"step": 6000
},
{
"epoch": 0.005178660073492584,
"learning_rate": 7e-05,
"loss": 2.28459765625,
"step": 7000
},
{
"epoch": 0.005918468655420096,
"learning_rate": 8e-05,
"loss": 2.228125,
"step": 8000
},
{
"epoch": 0.006658277237347608,
"learning_rate": 9e-05,
"loss": 2.19816015625,
"step": 9000
},
{
"epoch": 0.00739808581927512,
"learning_rate": 0.0001,
"loss": 2.15448828125,
"step": 10000
},
{
"epoch": 0.008137894401202633,
"learning_rate": 9.98989898989899e-05,
"loss": 2.13649609375,
"step": 11000
},
{
"epoch": 0.008877702983130144,
"learning_rate": 9.97979797979798e-05,
"loss": 2.08796484375,
"step": 12000
},
{
"epoch": 0.009617511565057657,
"learning_rate": 9.96969696969697e-05,
"loss": 2.0968984375,
"step": 13000
},
{
"epoch": 0.010357320146985169,
"learning_rate": 9.95959595959596e-05,
"loss": 2.08955078125,
"step": 14000
},
{
"epoch": 0.011097128728912681,
"learning_rate": 9.94949494949495e-05,
"loss": 2.0763828125,
"step": 15000
},
{
"epoch": 0.011836937310840193,
"learning_rate": 9.939393939393939e-05,
"loss": 2.06798828125,
"step": 16000
},
{
"epoch": 0.012576745892767706,
"learning_rate": 9.92929292929293e-05,
"loss": 2.06260546875,
"step": 17000
},
{
"epoch": 0.013316554474695217,
"learning_rate": 9.919191919191919e-05,
"loss": 2.058328125,
"step": 18000
},
{
"epoch": 0.01405636305662273,
"learning_rate": 9.909090909090911e-05,
"loss": 2.04747265625,
"step": 19000
},
{
"epoch": 0.01479617163855024,
"learning_rate": 9.8989898989899e-05,
"loss": 2.0418671875,
"step": 20000
},
{
"epoch": 0.015535980220477754,
"learning_rate": 9.888888888888889e-05,
"loss": 2.0486484375,
"step": 21000
},
{
"epoch": 0.016275788802405267,
"learning_rate": 9.87878787878788e-05,
"loss": 2.052984375,
"step": 22000
},
{
"epoch": 0.017015597384332778,
"learning_rate": 9.868686868686869e-05,
"loss": 2.0374609375,
"step": 23000
},
{
"epoch": 0.01775540596626029,
"learning_rate": 9.85858585858586e-05,
"loss": 2.03405078125,
"step": 24000
},
{
"epoch": 0.018495214548187804,
"learning_rate": 9.848484848484849e-05,
"loss": 2.04113671875,
"step": 25000
},
{
"epoch": 0.019235023130115315,
"learning_rate": 9.838383838383838e-05,
"loss": 2.02526953125,
"step": 26000
},
{
"epoch": 0.019974831712042826,
"learning_rate": 9.828282828282829e-05,
"loss": 2.0195234375,
"step": 27000
},
{
"epoch": 0.020714640293970337,
"learning_rate": 9.818181818181818e-05,
"loss": 2.0342734375,
"step": 28000
},
{
"epoch": 0.021454448875897852,
"learning_rate": 9.808080808080809e-05,
"loss": 2.0206328125,
"step": 29000
},
{
"epoch": 0.022194257457825363,
"learning_rate": 9.797979797979798e-05,
"loss": 2.0215859375,
"step": 30000
},
{
"epoch": 0.022934066039752874,
"learning_rate": 9.787878787878789e-05,
"loss": 2.0064453125,
"step": 31000
},
{
"epoch": 0.023673874621680385,
"learning_rate": 9.777777777777778e-05,
"loss": 2.0098984375,
"step": 32000
},
{
"epoch": 0.0244136832036079,
"learning_rate": 9.767676767676767e-05,
"loss": 2.0178046875,
"step": 33000
},
{
"epoch": 0.02515349178553541,
"learning_rate": 9.757575757575758e-05,
"loss": 2.01334375,
"step": 34000
},
{
"epoch": 0.025893300367462922,
"learning_rate": 9.747474747474747e-05,
"loss": 2.01625,
"step": 35000
},
{
"epoch": 0.026633108949390433,
"learning_rate": 9.737373737373738e-05,
"loss": 2.0108828125,
"step": 36000
},
{
"epoch": 0.027372917531317948,
"learning_rate": 9.727272727272728e-05,
"loss": 2.018734375,
"step": 37000
},
{
"epoch": 0.02811272611324546,
"learning_rate": 9.717171717171718e-05,
"loss": 2.003578125,
"step": 38000
},
{
"epoch": 0.02885253469517297,
"learning_rate": 9.707070707070708e-05,
"loss": 2.01503125,
"step": 39000
},
{
"epoch": 0.02959234327710048,
"learning_rate": 9.696969696969698e-05,
"loss": 2.016765625,
"step": 40000
},
{
"epoch": 0.030332151859027996,
"learning_rate": 9.686868686868688e-05,
"loss": 2.0055703125,
"step": 41000
},
{
"epoch": 0.031071960440955507,
"learning_rate": 9.676767676767677e-05,
"loss": 2.015375,
"step": 42000
},
{
"epoch": 0.03181176902288302,
"learning_rate": 9.666666666666667e-05,
"loss": 2.0087421875,
"step": 43000
},
{
"epoch": 0.03255157760481053,
"learning_rate": 9.656565656565657e-05,
"loss": 2.016359375,
"step": 44000
},
{
"epoch": 0.03329138618673804,
"learning_rate": 9.646464646464647e-05,
"loss": 2.015015625,
"step": 45000
},
{
"epoch": 0.034031194768665556,
"learning_rate": 9.636363636363637e-05,
"loss": 2.025421875,
"step": 46000
},
{
"epoch": 0.03477100335059307,
"learning_rate": 9.626262626262627e-05,
"loss": 2.0208671875,
"step": 47000
},
{
"epoch": 0.03551081193252058,
"learning_rate": 9.616161616161616e-05,
"loss": 2.00615625,
"step": 48000
},
{
"epoch": 0.03625062051444809,
"learning_rate": 9.606060606060606e-05,
"loss": 2.00828125,
"step": 49000
},
{
"epoch": 0.03699042909637561,
"learning_rate": 9.595959595959596e-05,
"loss": 1.998234375,
"step": 50000
},
{
"epoch": 0.037730237678303115,
"learning_rate": 9.585858585858586e-05,
"loss": 2.0086640625,
"step": 51000
},
{
"epoch": 0.03847004626023063,
"learning_rate": 9.575757575757576e-05,
"loss": 1.9955234375,
"step": 52000
},
{
"epoch": 0.03920985484215814,
"learning_rate": 9.565656565656566e-05,
"loss": 2.0098984375,
"step": 53000
},
{
"epoch": 0.03994966342408565,
"learning_rate": 9.555555555555557e-05,
"loss": 1.9921015625,
"step": 54000
},
{
"epoch": 0.040689472006013166,
"learning_rate": 9.545454545454546e-05,
"loss": 2.0024765625,
"step": 55000
},
{
"epoch": 0.041429280587940674,
"learning_rate": 9.535353535353537e-05,
"loss": 2.0018515625,
"step": 56000
},
{
"epoch": 0.04216908916986819,
"learning_rate": 9.525252525252526e-05,
"loss": 2.009984375,
"step": 57000
},
{
"epoch": 0.042908897751795703,
"learning_rate": 9.515151515151515e-05,
"loss": 2.00109375,
"step": 58000
},
{
"epoch": 0.04364870633372321,
"learning_rate": 9.505050505050506e-05,
"loss": 2.017796875,
"step": 59000
},
{
"epoch": 0.044388514915650726,
"learning_rate": 9.494949494949495e-05,
"loss": 2.005921875,
"step": 60000
},
{
"epoch": 0.045128323497578234,
"learning_rate": 9.484848484848486e-05,
"loss": 2.012265625,
"step": 61000
},
{
"epoch": 0.04586813207950575,
"learning_rate": 9.474747474747475e-05,
"loss": 1.99825,
"step": 62000
},
{
"epoch": 0.04660794066143326,
"learning_rate": 9.464646464646464e-05,
"loss": 2.003953125,
"step": 63000
},
{
"epoch": 0.04734774924336077,
"learning_rate": 9.454545454545455e-05,
"loss": 2.019515625,
"step": 64000
},
{
"epoch": 0.048087557825288285,
"learning_rate": 9.444444444444444e-05,
"loss": 2.00675,
"step": 65000
},
{
"epoch": 0.0488273664072158,
"learning_rate": 9.434343434343435e-05,
"loss": 2.005234375,
"step": 66000
},
{
"epoch": 0.04956717498914331,
"learning_rate": 9.424242424242424e-05,
"loss": 1.9940625,
"step": 67000
},
{
"epoch": 0.05030698357107082,
"learning_rate": 9.414141414141415e-05,
"loss": 1.999109375,
"step": 68000
},
{
"epoch": 0.05104679215299833,
"learning_rate": 9.404040404040404e-05,
"loss": 2.00134375,
"step": 69000
},
{
"epoch": 0.051786600734925844,
"learning_rate": 9.393939393939395e-05,
"loss": 2.007953125,
"step": 70000
},
{
"epoch": 0.05252640931685336,
"learning_rate": 9.383838383838385e-05,
"loss": 2.005171875,
"step": 71000
},
{
"epoch": 0.05326621789878087,
"learning_rate": 9.373737373737375e-05,
"loss": 2.000734375,
"step": 72000
},
{
"epoch": 0.05400602648070838,
"learning_rate": 9.363636363636364e-05,
"loss": 2.01009375,
"step": 73000
},
{
"epoch": 0.054745835062635896,
"learning_rate": 9.353535353535354e-05,
"loss": 2.006703125,
"step": 74000
},
{
"epoch": 0.055485643644563404,
"learning_rate": 9.343434343434344e-05,
"loss": 2.019640625,
"step": 75000
},
{
"epoch": 0.05622545222649092,
"learning_rate": 9.333333333333334e-05,
"loss": 1.999078125,
"step": 76000
},
{
"epoch": 0.05696526080841843,
"learning_rate": 9.323232323232324e-05,
"loss": 1.995890625,
"step": 77000
},
{
"epoch": 0.05770506939034594,
"learning_rate": 9.313131313131314e-05,
"loss": 2.00525,
"step": 78000
},
{
"epoch": 0.058444877972273455,
"learning_rate": 9.303030303030303e-05,
"loss": 2.0128125,
"step": 79000
},
{
"epoch": 0.05918468655420096,
"learning_rate": 9.292929292929293e-05,
"loss": 2.006140625,
"step": 80000
},
{
"epoch": 0.05992449513612848,
"learning_rate": 9.282828282828283e-05,
"loss": 2.0091875,
"step": 81000
},
{
"epoch": 0.06066430371805599,
"learning_rate": 9.272727272727273e-05,
"loss": 1.997203125,
"step": 82000
},
{
"epoch": 0.0614041122999835,
"learning_rate": 9.262626262626263e-05,
"loss": 2.020015625,
"step": 83000
},
{
"epoch": 0.062143920881911015,
"learning_rate": 9.252525252525253e-05,
"loss": 2.028375,
"step": 84000
},
{
"epoch": 0.06288372946383852,
"learning_rate": 9.242424242424242e-05,
"loss": 2.006765625,
"step": 85000
},
{
"epoch": 0.06362353804576604,
"learning_rate": 9.232323232323232e-05,
"loss": 2.016703125,
"step": 86000
},
{
"epoch": 0.06436334662769355,
"learning_rate": 9.222222222222223e-05,
"loss": 2.041,
"step": 87000
},
{
"epoch": 0.06510315520962107,
"learning_rate": 9.212121212121214e-05,
"loss": 2.029640625,
"step": 88000
},
{
"epoch": 0.06584296379154858,
"learning_rate": 9.202020202020203e-05,
"loss": 2.005890625,
"step": 89000
},
{
"epoch": 0.06658277237347608,
"learning_rate": 9.191919191919192e-05,
"loss": 1.997921875,
"step": 90000
},
{
"epoch": 0.0673225809554036,
"learning_rate": 9.181818181818183e-05,
"loss": 1.993796875,
"step": 91000
},
{
"epoch": 0.06806238953733111,
"learning_rate": 9.171717171717172e-05,
"loss": 1.997109375,
"step": 92000
},
{
"epoch": 0.06880219811925863,
"learning_rate": 9.161616161616163e-05,
"loss": 2.004078125,
"step": 93000
},
{
"epoch": 0.06954200670118614,
"learning_rate": 9.151515151515152e-05,
"loss": 2.001828125,
"step": 94000
},
{
"epoch": 0.07028181528311364,
"learning_rate": 9.141414141414141e-05,
"loss": 1.9931875,
"step": 95000
},
{
"epoch": 0.07102162386504116,
"learning_rate": 9.131313131313132e-05,
"loss": 1.987171875,
"step": 96000
},
{
"epoch": 0.07176143244696867,
"learning_rate": 9.121212121212121e-05,
"loss": 2.0305,
"step": 97000
},
{
"epoch": 0.07250124102889619,
"learning_rate": 9.111111111111112e-05,
"loss": 1.999484375,
"step": 98000
},
{
"epoch": 0.0732410496108237,
"learning_rate": 9.101010101010101e-05,
"loss": 1.989171875,
"step": 99000
},
{
"epoch": 0.07398085819275121,
"learning_rate": 9.090909090909092e-05,
"loss": 1.988671875,
"step": 100000
},
{
"epoch": 0.07472066677467872,
"learning_rate": 9.080808080808081e-05,
"loss": 1.989,
"step": 101000
},
{
"epoch": 0.07546047535660623,
"learning_rate": 9.07070707070707e-05,
"loss": 1.99684375,
"step": 102000
},
{
"epoch": 0.07620028393853374,
"learning_rate": 9.060606060606061e-05,
"loss": 2.004921875,
"step": 103000
},
{
"epoch": 0.07694009252046126,
"learning_rate": 9.050505050505052e-05,
"loss": 2.01390625,
"step": 104000
},
{
"epoch": 0.07767990110238877,
"learning_rate": 9.040404040404041e-05,
"loss": 1.98865625,
"step": 105000
},
{
"epoch": 0.07841970968431627,
"learning_rate": 9.030303030303031e-05,
"loss": 1.98871875,
"step": 106000
},
{
"epoch": 0.07915951826624379,
"learning_rate": 9.02020202020202e-05,
"loss": 1.994046875,
"step": 107000
},
{
"epoch": 0.0798993268481713,
"learning_rate": 9.010101010101011e-05,
"loss": 2.010546875,
"step": 108000
},
{
"epoch": 0.08063913543009882,
"learning_rate": 9e-05,
"loss": 2.005640625,
"step": 109000
},
{
"epoch": 0.08137894401202633,
"learning_rate": 8.98989898989899e-05,
"loss": 1.99709375,
"step": 110000
},
{
"epoch": 0.08211875259395383,
"learning_rate": 8.97979797979798e-05,
"loss": 2.01225,
"step": 111000
},
{
"epoch": 0.08285856117588135,
"learning_rate": 8.96969696969697e-05,
"loss": 1.982421875,
"step": 112000
},
{
"epoch": 0.08359836975780886,
"learning_rate": 8.95959595959596e-05,
"loss": 1.988484375,
"step": 113000
},
{
"epoch": 0.08433817833973638,
"learning_rate": 8.94949494949495e-05,
"loss": 2.001359375,
"step": 114000
},
{
"epoch": 0.08507798692166389,
"learning_rate": 8.93939393939394e-05,
"loss": 2.001796875,
"step": 115000
},
{
"epoch": 0.08581779550359141,
"learning_rate": 8.92929292929293e-05,
"loss": 2.012875,
"step": 116000
},
{
"epoch": 0.08655760408551891,
"learning_rate": 8.919191919191919e-05,
"loss": 2.007953125,
"step": 117000
},
{
"epoch": 0.08729741266744642,
"learning_rate": 8.90909090909091e-05,
"loss": 2.00078125,
"step": 118000
},
{
"epoch": 0.08803722124937394,
"learning_rate": 8.898989898989899e-05,
"loss": 2.02653125,
"step": 119000
},
{
"epoch": 0.08877702983130145,
"learning_rate": 8.888888888888889e-05,
"loss": 2.006796875,
"step": 120000
},
{
"epoch": 0.08951683841322897,
"learning_rate": 8.87878787878788e-05,
"loss": 2.007296875,
"step": 121000
},
{
"epoch": 0.09025664699515647,
"learning_rate": 8.868686868686869e-05,
"loss": 2.012890625,
"step": 122000
},
{
"epoch": 0.09099645557708398,
"learning_rate": 8.85858585858586e-05,
"loss": 2.012546875,
"step": 123000
},
{
"epoch": 0.0917362641590115,
"learning_rate": 8.848484848484849e-05,
"loss": 2.016765625,
"step": 124000
},
{
"epoch": 0.09247607274093901,
"learning_rate": 8.83838383838384e-05,
"loss": 1.985875,
"step": 125000
},
{
"epoch": 0.09321588132286653,
"learning_rate": 8.828282828282829e-05,
"loss": 2.0315,
"step": 126000
},
{
"epoch": 0.09395568990479404,
"learning_rate": 8.818181818181818e-05,
"loss": 2.061625,
"step": 127000
},
{
"epoch": 0.09469549848672154,
"learning_rate": 8.808080808080809e-05,
"loss": 2.03778125,
"step": 128000
},
{
"epoch": 0.09543530706864906,
"learning_rate": 8.797979797979798e-05,
"loss": 2.0075625,
"step": 129000
},
{
"epoch": 0.09617511565057657,
"learning_rate": 8.787878787878789e-05,
"loss": 1.9984375,
"step": 130000
},
{
"epoch": 0.09691492423250408,
"learning_rate": 8.777777777777778e-05,
"loss": 1.99621875,
"step": 131000
},
{
"epoch": 0.0976547328144316,
"learning_rate": 8.767676767676767e-05,
"loss": 2.01059375,
"step": 132000
},
{
"epoch": 0.0983945413963591,
"learning_rate": 8.757575757575758e-05,
"loss": 2.0471875,
"step": 133000
},
{
"epoch": 0.09913434997828661,
"learning_rate": 8.747474747474747e-05,
"loss": 2.01190625,
"step": 134000
},
{
"epoch": 0.09987415856021413,
"learning_rate": 8.737373737373738e-05,
"loss": 2.00653125,
"step": 135000
},
{
"epoch": 0.10061396714214164,
"learning_rate": 8.727272727272727e-05,
"loss": 2.014375,
"step": 136000
},
{
"epoch": 0.10135377572406916,
"learning_rate": 8.717171717171718e-05,
"loss": 2.00065625,
"step": 137000
},
{
"epoch": 0.10209358430599666,
"learning_rate": 8.707070707070707e-05,
"loss": 2.031875,
"step": 138000
},
{
"epoch": 0.10283339288792417,
"learning_rate": 8.696969696969698e-05,
"loss": 1.99303125,
"step": 139000
},
{
"epoch": 0.10357320146985169,
"learning_rate": 8.686868686868688e-05,
"loss": 1.97703125,
"step": 140000
},
{
"epoch": 0.1043130100517792,
"learning_rate": 8.676767676767678e-05,
"loss": 1.99440625,
"step": 141000
},
{
"epoch": 0.10505281863370672,
"learning_rate": 8.666666666666667e-05,
"loss": 2.00790625,
"step": 142000
},
{
"epoch": 0.10579262721563423,
"learning_rate": 8.656565656565657e-05,
"loss": 1.9769375,
"step": 143000
},
{
"epoch": 0.10653243579756173,
"learning_rate": 8.646464646464647e-05,
"loss": 1.98775,
"step": 144000
},
{
"epoch": 0.10727224437948925,
"learning_rate": 8.636363636363637e-05,
"loss": 1.970875,
"step": 145000
},
{
"epoch": 0.10801205296141676,
"learning_rate": 8.626262626262627e-05,
"loss": 1.97203125,
"step": 146000
},
{
"epoch": 0.10875186154334428,
"learning_rate": 8.616161616161616e-05,
"loss": 1.97359375,
"step": 147000
},
{
"epoch": 0.10949167012527179,
"learning_rate": 8.606060606060606e-05,
"loss": 1.97471875,
"step": 148000
},
{
"epoch": 0.11023147870719929,
"learning_rate": 8.595959595959596e-05,
"loss": 1.96978125,
"step": 149000
},
{
"epoch": 0.11097128728912681,
"learning_rate": 8.585858585858586e-05,
"loss": 1.9813125,
"step": 150000
},
{
"epoch": 0.11171109587105432,
"learning_rate": 8.575757575757576e-05,
"loss": 1.9761875,
"step": 151000
},
{
"epoch": 0.11245090445298184,
"learning_rate": 8.565656565656566e-05,
"loss": 1.9814375,
"step": 152000
},
{
"epoch": 0.11319071303490935,
"learning_rate": 8.555555555555556e-05,
"loss": 1.97459375,
"step": 153000
},
{
"epoch": 0.11393052161683687,
"learning_rate": 8.545454545454545e-05,
"loss": 1.97503125,
"step": 154000
},
{
"epoch": 0.11467033019876437,
"learning_rate": 8.535353535353535e-05,
"loss": 1.98446875,
"step": 155000
},
{
"epoch": 0.11541013878069188,
"learning_rate": 8.525252525252526e-05,
"loss": 1.997,
"step": 156000
},
{
"epoch": 0.1161499473626194,
"learning_rate": 8.515151515151515e-05,
"loss": 1.98559375,
"step": 157000
},
{
"epoch": 0.11688975594454691,
"learning_rate": 8.505050505050506e-05,
"loss": 1.99725,
"step": 158000
},
{
"epoch": 0.11762956452647443,
"learning_rate": 8.494949494949495e-05,
"loss": 1.997375,
"step": 159000
},
{
"epoch": 0.11836937310840193,
"learning_rate": 8.484848484848486e-05,
"loss": 1.98828125,
"step": 160000
},
{
"epoch": 0.11910918169032944,
"learning_rate": 8.474747474747475e-05,
"loss": 1.9948125,
"step": 161000
},
{
"epoch": 0.11984899027225696,
"learning_rate": 8.464646464646466e-05,
"loss": 1.986,
"step": 162000
},
{
"epoch": 0.12058879885418447,
"learning_rate": 8.454545454545455e-05,
"loss": 1.99940625,
"step": 163000
},
{
"epoch": 0.12132860743611198,
"learning_rate": 8.444444444444444e-05,
"loss": 1.99140625,
"step": 164000
},
{
"epoch": 0.12206841601803949,
"learning_rate": 8.434343434343435e-05,
"loss": 1.996375,
"step": 165000
},
{
"epoch": 0.122808224599967,
"learning_rate": 8.424242424242424e-05,
"loss": 1.99515625,
"step": 166000
},
{
"epoch": 0.12354803318189451,
"learning_rate": 8.414141414141415e-05,
"loss": 1.97484375,
"step": 167000
},
{
"epoch": 0.12428784176382203,
"learning_rate": 8.404040404040404e-05,
"loss": 1.99240625,
"step": 168000
},
{
"epoch": 0.12502765034574953,
"learning_rate": 8.393939393939393e-05,
"loss": 1.99184375,
"step": 169000
},
{
"epoch": 0.12576745892767704,
"learning_rate": 8.383838383838384e-05,
"loss": 1.99071875,
"step": 170000
},
{
"epoch": 0.12650726750960456,
"learning_rate": 8.373737373737373e-05,
"loss": 1.99640625,
"step": 171000
},
{
"epoch": 0.12724707609153207,
"learning_rate": 8.363636363636364e-05,
"loss": 2.009375,
"step": 172000
},
{
"epoch": 0.1279868846734596,
"learning_rate": 8.353535353535355e-05,
"loss": 2.01953125,
"step": 173000
},
{
"epoch": 0.1287266932553871,
"learning_rate": 8.343434343434344e-05,
"loss": 2.0111875,
"step": 174000
},
{
"epoch": 0.12946650183731462,
"learning_rate": 8.333333333333334e-05,
"loss": 2.014,
"step": 175000
},
{
"epoch": 0.13020631041924213,
"learning_rate": 8.323232323232324e-05,
"loss": 2.0209375,
"step": 176000
},
{
"epoch": 0.13094611900116965,
"learning_rate": 8.313131313131314e-05,
"loss": 1.98971875,
"step": 177000
},
{
"epoch": 0.13168592758309716,
"learning_rate": 8.303030303030304e-05,
"loss": 1.978,
"step": 178000
},
{
"epoch": 0.13242573616502465,
"learning_rate": 8.292929292929293e-05,
"loss": 2.0005,
"step": 179000
},
{
"epoch": 0.13316554474695216,
"learning_rate": 8.282828282828283e-05,
"loss": 2.01215625,
"step": 180000
},
{
"epoch": 0.13390535332887968,
"learning_rate": 8.272727272727273e-05,
"loss": 2.0359375,
"step": 181000
},
{
"epoch": 0.1346451619108072,
"learning_rate": 8.262626262626263e-05,
"loss": 2.0521875,
"step": 182000
},
{
"epoch": 0.1353849704927347,
"learning_rate": 8.252525252525253e-05,
"loss": 2.0225625,
"step": 183000
},
{
"epoch": 0.13612477907466222,
"learning_rate": 8.242424242424243e-05,
"loss": 2.02978125,
"step": 184000
},
{
"epoch": 0.13686458765658974,
"learning_rate": 8.232323232323233e-05,
"loss": 2.09690625,
"step": 185000
},
{
"epoch": 0.13760439623851725,
"learning_rate": 8.222222222222222e-05,
"loss": 2.14709375,
"step": 186000
},
{
"epoch": 0.13834420482044477,
"learning_rate": 8.212121212121212e-05,
"loss": 2.05809375,
"step": 187000
},
{
"epoch": 0.13908401340237228,
"learning_rate": 8.202020202020202e-05,
"loss": 2.24625,
"step": 188000
},
{
"epoch": 0.1398238219842998,
"learning_rate": 8.191919191919192e-05,
"loss": 2.2840625,
"step": 189000
},
{
"epoch": 0.14056363056622728,
"learning_rate": 8.181818181818183e-05,
"loss": 2.23384375,
"step": 190000
},
{
"epoch": 0.1413034391481548,
"learning_rate": 8.171717171717172e-05,
"loss": 2.27740625,
"step": 191000
},
{
"epoch": 0.1420432477300823,
"learning_rate": 8.161616161616163e-05,
"loss": 2.3976875,
"step": 192000
},
{
"epoch": 0.14278305631200983,
"learning_rate": 8.151515151515152e-05,
"loss": 2.22225,
"step": 193000
},
{
"epoch": 0.14352286489393734,
"learning_rate": 8.141414141414141e-05,
"loss": 2.28578125,
"step": 194000
},
{
"epoch": 0.14426267347586486,
"learning_rate": 8.131313131313132e-05,
"loss": 2.22359375,
"step": 195000
},
{
"epoch": 0.14500248205779237,
"learning_rate": 8.121212121212121e-05,
"loss": 2.16703125,
"step": 196000
},
{
"epoch": 0.14574229063971988,
"learning_rate": 8.111111111111112e-05,
"loss": 2.26603125,
"step": 197000
},
{
"epoch": 0.1464820992216474,
"learning_rate": 8.101010101010101e-05,
"loss": 2.159125,
"step": 198000
},
{
"epoch": 0.14722190780357491,
"learning_rate": 8.090909090909092e-05,
"loss": 2.02203125,
"step": 199000
},
{
"epoch": 0.14796171638550243,
"learning_rate": 8.080808080808081e-05,
"loss": 2.01090625,
"step": 200000
},
{
"epoch": 0.14870152496742992,
"learning_rate": 8.07070707070707e-05,
"loss": 2.0288125,
"step": 201000
},
{
"epoch": 0.14944133354935743,
"learning_rate": 8.060606060606061e-05,
"loss": 2.00403125,
"step": 202000
},
{
"epoch": 0.15018114213128494,
"learning_rate": 8.05050505050505e-05,
"loss": 1.99765625,
"step": 203000
},
{
"epoch": 0.15092095071321246,
"learning_rate": 8.040404040404041e-05,
"loss": 1.98784375,
"step": 204000
}
],
"max_steps": 1000000,
"num_train_epochs": 1,
"total_flos": 369792323308800000
}