|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.15092095071321246, |
|
"global_step": 204000, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000739808581927512, |
|
"learning_rate": 1e-05, |
|
"loss": 9.4473583984375, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.001479617163855024, |
|
"learning_rate": 2e-05, |
|
"loss": 4.761673828125, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.002219425745782536, |
|
"learning_rate": 3e-05, |
|
"loss": 3.3295283203125, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.002959234327710048, |
|
"learning_rate": 4e-05, |
|
"loss": 2.793072265625, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.00369904290963756, |
|
"learning_rate": 5e-05, |
|
"loss": 2.51521875, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.004438851491565072, |
|
"learning_rate": 6e-05, |
|
"loss": 2.3833671875, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.005178660073492584, |
|
"learning_rate": 7e-05, |
|
"loss": 2.28459765625, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.005918468655420096, |
|
"learning_rate": 8e-05, |
|
"loss": 2.228125, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.006658277237347608, |
|
"learning_rate": 9e-05, |
|
"loss": 2.19816015625, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.00739808581927512, |
|
"learning_rate": 0.0001, |
|
"loss": 2.15448828125, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.008137894401202633, |
|
"learning_rate": 9.98989898989899e-05, |
|
"loss": 2.13649609375, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.008877702983130144, |
|
"learning_rate": 9.97979797979798e-05, |
|
"loss": 2.08796484375, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.009617511565057657, |
|
"learning_rate": 9.96969696969697e-05, |
|
"loss": 2.0968984375, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.010357320146985169, |
|
"learning_rate": 9.95959595959596e-05, |
|
"loss": 2.08955078125, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.011097128728912681, |
|
"learning_rate": 9.94949494949495e-05, |
|
"loss": 2.0763828125, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.011836937310840193, |
|
"learning_rate": 9.939393939393939e-05, |
|
"loss": 2.06798828125, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.012576745892767706, |
|
"learning_rate": 9.92929292929293e-05, |
|
"loss": 2.06260546875, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.013316554474695217, |
|
"learning_rate": 9.919191919191919e-05, |
|
"loss": 2.058328125, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.01405636305662273, |
|
"learning_rate": 9.909090909090911e-05, |
|
"loss": 2.04747265625, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.01479617163855024, |
|
"learning_rate": 9.8989898989899e-05, |
|
"loss": 2.0418671875, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.015535980220477754, |
|
"learning_rate": 9.888888888888889e-05, |
|
"loss": 2.0486484375, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.016275788802405267, |
|
"learning_rate": 9.87878787878788e-05, |
|
"loss": 2.052984375, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.017015597384332778, |
|
"learning_rate": 9.868686868686869e-05, |
|
"loss": 2.0374609375, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.01775540596626029, |
|
"learning_rate": 9.85858585858586e-05, |
|
"loss": 2.03405078125, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.018495214548187804, |
|
"learning_rate": 9.848484848484849e-05, |
|
"loss": 2.04113671875, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.019235023130115315, |
|
"learning_rate": 9.838383838383838e-05, |
|
"loss": 2.02526953125, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.019974831712042826, |
|
"learning_rate": 9.828282828282829e-05, |
|
"loss": 2.0195234375, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.020714640293970337, |
|
"learning_rate": 9.818181818181818e-05, |
|
"loss": 2.0342734375, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.021454448875897852, |
|
"learning_rate": 9.808080808080809e-05, |
|
"loss": 2.0206328125, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.022194257457825363, |
|
"learning_rate": 9.797979797979798e-05, |
|
"loss": 2.0215859375, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.022934066039752874, |
|
"learning_rate": 9.787878787878789e-05, |
|
"loss": 2.0064453125, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.023673874621680385, |
|
"learning_rate": 9.777777777777778e-05, |
|
"loss": 2.0098984375, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.0244136832036079, |
|
"learning_rate": 9.767676767676767e-05, |
|
"loss": 2.0178046875, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.02515349178553541, |
|
"learning_rate": 9.757575757575758e-05, |
|
"loss": 2.01334375, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.025893300367462922, |
|
"learning_rate": 9.747474747474747e-05, |
|
"loss": 2.01625, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.026633108949390433, |
|
"learning_rate": 9.737373737373738e-05, |
|
"loss": 2.0108828125, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.027372917531317948, |
|
"learning_rate": 9.727272727272728e-05, |
|
"loss": 2.018734375, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.02811272611324546, |
|
"learning_rate": 9.717171717171718e-05, |
|
"loss": 2.003578125, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.02885253469517297, |
|
"learning_rate": 9.707070707070708e-05, |
|
"loss": 2.01503125, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.02959234327710048, |
|
"learning_rate": 9.696969696969698e-05, |
|
"loss": 2.016765625, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.030332151859027996, |
|
"learning_rate": 9.686868686868688e-05, |
|
"loss": 2.0055703125, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.031071960440955507, |
|
"learning_rate": 9.676767676767677e-05, |
|
"loss": 2.015375, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.03181176902288302, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 2.0087421875, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.03255157760481053, |
|
"learning_rate": 9.656565656565657e-05, |
|
"loss": 2.016359375, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.03329138618673804, |
|
"learning_rate": 9.646464646464647e-05, |
|
"loss": 2.015015625, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.034031194768665556, |
|
"learning_rate": 9.636363636363637e-05, |
|
"loss": 2.025421875, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.03477100335059307, |
|
"learning_rate": 9.626262626262627e-05, |
|
"loss": 2.0208671875, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.03551081193252058, |
|
"learning_rate": 9.616161616161616e-05, |
|
"loss": 2.00615625, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.03625062051444809, |
|
"learning_rate": 9.606060606060606e-05, |
|
"loss": 2.00828125, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.03699042909637561, |
|
"learning_rate": 9.595959595959596e-05, |
|
"loss": 1.998234375, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.037730237678303115, |
|
"learning_rate": 9.585858585858586e-05, |
|
"loss": 2.0086640625, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.03847004626023063, |
|
"learning_rate": 9.575757575757576e-05, |
|
"loss": 1.9955234375, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.03920985484215814, |
|
"learning_rate": 9.565656565656566e-05, |
|
"loss": 2.0098984375, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.03994966342408565, |
|
"learning_rate": 9.555555555555557e-05, |
|
"loss": 1.9921015625, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.040689472006013166, |
|
"learning_rate": 9.545454545454546e-05, |
|
"loss": 2.0024765625, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.041429280587940674, |
|
"learning_rate": 9.535353535353537e-05, |
|
"loss": 2.0018515625, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.04216908916986819, |
|
"learning_rate": 9.525252525252526e-05, |
|
"loss": 2.009984375, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.042908897751795703, |
|
"learning_rate": 9.515151515151515e-05, |
|
"loss": 2.00109375, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.04364870633372321, |
|
"learning_rate": 9.505050505050506e-05, |
|
"loss": 2.017796875, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.044388514915650726, |
|
"learning_rate": 9.494949494949495e-05, |
|
"loss": 2.005921875, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.045128323497578234, |
|
"learning_rate": 9.484848484848486e-05, |
|
"loss": 2.012265625, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.04586813207950575, |
|
"learning_rate": 9.474747474747475e-05, |
|
"loss": 1.99825, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.04660794066143326, |
|
"learning_rate": 9.464646464646464e-05, |
|
"loss": 2.003953125, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.04734774924336077, |
|
"learning_rate": 9.454545454545455e-05, |
|
"loss": 2.019515625, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.048087557825288285, |
|
"learning_rate": 9.444444444444444e-05, |
|
"loss": 2.00675, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.0488273664072158, |
|
"learning_rate": 9.434343434343435e-05, |
|
"loss": 2.005234375, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.04956717498914331, |
|
"learning_rate": 9.424242424242424e-05, |
|
"loss": 1.9940625, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.05030698357107082, |
|
"learning_rate": 9.414141414141415e-05, |
|
"loss": 1.999109375, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.05104679215299833, |
|
"learning_rate": 9.404040404040404e-05, |
|
"loss": 2.00134375, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.051786600734925844, |
|
"learning_rate": 9.393939393939395e-05, |
|
"loss": 2.007953125, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.05252640931685336, |
|
"learning_rate": 9.383838383838385e-05, |
|
"loss": 2.005171875, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.05326621789878087, |
|
"learning_rate": 9.373737373737375e-05, |
|
"loss": 2.000734375, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.05400602648070838, |
|
"learning_rate": 9.363636363636364e-05, |
|
"loss": 2.01009375, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.054745835062635896, |
|
"learning_rate": 9.353535353535354e-05, |
|
"loss": 2.006703125, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.055485643644563404, |
|
"learning_rate": 9.343434343434344e-05, |
|
"loss": 2.019640625, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.05622545222649092, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 1.999078125, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.05696526080841843, |
|
"learning_rate": 9.323232323232324e-05, |
|
"loss": 1.995890625, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.05770506939034594, |
|
"learning_rate": 9.313131313131314e-05, |
|
"loss": 2.00525, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.058444877972273455, |
|
"learning_rate": 9.303030303030303e-05, |
|
"loss": 2.0128125, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.05918468655420096, |
|
"learning_rate": 9.292929292929293e-05, |
|
"loss": 2.006140625, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.05992449513612848, |
|
"learning_rate": 9.282828282828283e-05, |
|
"loss": 2.0091875, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.06066430371805599, |
|
"learning_rate": 9.272727272727273e-05, |
|
"loss": 1.997203125, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.0614041122999835, |
|
"learning_rate": 9.262626262626263e-05, |
|
"loss": 2.020015625, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.062143920881911015, |
|
"learning_rate": 9.252525252525253e-05, |
|
"loss": 2.028375, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.06288372946383852, |
|
"learning_rate": 9.242424242424242e-05, |
|
"loss": 2.006765625, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.06362353804576604, |
|
"learning_rate": 9.232323232323232e-05, |
|
"loss": 2.016703125, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.06436334662769355, |
|
"learning_rate": 9.222222222222223e-05, |
|
"loss": 2.041, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.06510315520962107, |
|
"learning_rate": 9.212121212121214e-05, |
|
"loss": 2.029640625, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.06584296379154858, |
|
"learning_rate": 9.202020202020203e-05, |
|
"loss": 2.005890625, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.06658277237347608, |
|
"learning_rate": 9.191919191919192e-05, |
|
"loss": 1.997921875, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.0673225809554036, |
|
"learning_rate": 9.181818181818183e-05, |
|
"loss": 1.993796875, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.06806238953733111, |
|
"learning_rate": 9.171717171717172e-05, |
|
"loss": 1.997109375, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.06880219811925863, |
|
"learning_rate": 9.161616161616163e-05, |
|
"loss": 2.004078125, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.06954200670118614, |
|
"learning_rate": 9.151515151515152e-05, |
|
"loss": 2.001828125, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.07028181528311364, |
|
"learning_rate": 9.141414141414141e-05, |
|
"loss": 1.9931875, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.07102162386504116, |
|
"learning_rate": 9.131313131313132e-05, |
|
"loss": 1.987171875, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.07176143244696867, |
|
"learning_rate": 9.121212121212121e-05, |
|
"loss": 2.0305, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.07250124102889619, |
|
"learning_rate": 9.111111111111112e-05, |
|
"loss": 1.999484375, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.0732410496108237, |
|
"learning_rate": 9.101010101010101e-05, |
|
"loss": 1.989171875, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.07398085819275121, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 1.988671875, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.07472066677467872, |
|
"learning_rate": 9.080808080808081e-05, |
|
"loss": 1.989, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.07546047535660623, |
|
"learning_rate": 9.07070707070707e-05, |
|
"loss": 1.99684375, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.07620028393853374, |
|
"learning_rate": 9.060606060606061e-05, |
|
"loss": 2.004921875, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.07694009252046126, |
|
"learning_rate": 9.050505050505052e-05, |
|
"loss": 2.01390625, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.07767990110238877, |
|
"learning_rate": 9.040404040404041e-05, |
|
"loss": 1.98865625, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.07841970968431627, |
|
"learning_rate": 9.030303030303031e-05, |
|
"loss": 1.98871875, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.07915951826624379, |
|
"learning_rate": 9.02020202020202e-05, |
|
"loss": 1.994046875, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.0798993268481713, |
|
"learning_rate": 9.010101010101011e-05, |
|
"loss": 2.010546875, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.08063913543009882, |
|
"learning_rate": 9e-05, |
|
"loss": 2.005640625, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.08137894401202633, |
|
"learning_rate": 8.98989898989899e-05, |
|
"loss": 1.99709375, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.08211875259395383, |
|
"learning_rate": 8.97979797979798e-05, |
|
"loss": 2.01225, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.08285856117588135, |
|
"learning_rate": 8.96969696969697e-05, |
|
"loss": 1.982421875, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.08359836975780886, |
|
"learning_rate": 8.95959595959596e-05, |
|
"loss": 1.988484375, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.08433817833973638, |
|
"learning_rate": 8.94949494949495e-05, |
|
"loss": 2.001359375, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.08507798692166389, |
|
"learning_rate": 8.93939393939394e-05, |
|
"loss": 2.001796875, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.08581779550359141, |
|
"learning_rate": 8.92929292929293e-05, |
|
"loss": 2.012875, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.08655760408551891, |
|
"learning_rate": 8.919191919191919e-05, |
|
"loss": 2.007953125, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.08729741266744642, |
|
"learning_rate": 8.90909090909091e-05, |
|
"loss": 2.00078125, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.08803722124937394, |
|
"learning_rate": 8.898989898989899e-05, |
|
"loss": 2.02653125, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.08877702983130145, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 2.006796875, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.08951683841322897, |
|
"learning_rate": 8.87878787878788e-05, |
|
"loss": 2.007296875, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.09025664699515647, |
|
"learning_rate": 8.868686868686869e-05, |
|
"loss": 2.012890625, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.09099645557708398, |
|
"learning_rate": 8.85858585858586e-05, |
|
"loss": 2.012546875, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.0917362641590115, |
|
"learning_rate": 8.848484848484849e-05, |
|
"loss": 2.016765625, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.09247607274093901, |
|
"learning_rate": 8.83838383838384e-05, |
|
"loss": 1.985875, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.09321588132286653, |
|
"learning_rate": 8.828282828282829e-05, |
|
"loss": 2.0315, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.09395568990479404, |
|
"learning_rate": 8.818181818181818e-05, |
|
"loss": 2.061625, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.09469549848672154, |
|
"learning_rate": 8.808080808080809e-05, |
|
"loss": 2.03778125, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.09543530706864906, |
|
"learning_rate": 8.797979797979798e-05, |
|
"loss": 2.0075625, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.09617511565057657, |
|
"learning_rate": 8.787878787878789e-05, |
|
"loss": 1.9984375, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.09691492423250408, |
|
"learning_rate": 8.777777777777778e-05, |
|
"loss": 1.99621875, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.0976547328144316, |
|
"learning_rate": 8.767676767676767e-05, |
|
"loss": 2.01059375, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.0983945413963591, |
|
"learning_rate": 8.757575757575758e-05, |
|
"loss": 2.0471875, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.09913434997828661, |
|
"learning_rate": 8.747474747474747e-05, |
|
"loss": 2.01190625, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.09987415856021413, |
|
"learning_rate": 8.737373737373738e-05, |
|
"loss": 2.00653125, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.10061396714214164, |
|
"learning_rate": 8.727272727272727e-05, |
|
"loss": 2.014375, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.10135377572406916, |
|
"learning_rate": 8.717171717171718e-05, |
|
"loss": 2.00065625, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.10209358430599666, |
|
"learning_rate": 8.707070707070707e-05, |
|
"loss": 2.031875, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.10283339288792417, |
|
"learning_rate": 8.696969696969698e-05, |
|
"loss": 1.99303125, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.10357320146985169, |
|
"learning_rate": 8.686868686868688e-05, |
|
"loss": 1.97703125, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.1043130100517792, |
|
"learning_rate": 8.676767676767678e-05, |
|
"loss": 1.99440625, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.10505281863370672, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 2.00790625, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.10579262721563423, |
|
"learning_rate": 8.656565656565657e-05, |
|
"loss": 1.9769375, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.10653243579756173, |
|
"learning_rate": 8.646464646464647e-05, |
|
"loss": 1.98775, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.10727224437948925, |
|
"learning_rate": 8.636363636363637e-05, |
|
"loss": 1.970875, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.10801205296141676, |
|
"learning_rate": 8.626262626262627e-05, |
|
"loss": 1.97203125, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.10875186154334428, |
|
"learning_rate": 8.616161616161616e-05, |
|
"loss": 1.97359375, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.10949167012527179, |
|
"learning_rate": 8.606060606060606e-05, |
|
"loss": 1.97471875, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.11023147870719929, |
|
"learning_rate": 8.595959595959596e-05, |
|
"loss": 1.96978125, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.11097128728912681, |
|
"learning_rate": 8.585858585858586e-05, |
|
"loss": 1.9813125, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.11171109587105432, |
|
"learning_rate": 8.575757575757576e-05, |
|
"loss": 1.9761875, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.11245090445298184, |
|
"learning_rate": 8.565656565656566e-05, |
|
"loss": 1.9814375, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.11319071303490935, |
|
"learning_rate": 8.555555555555556e-05, |
|
"loss": 1.97459375, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.11393052161683687, |
|
"learning_rate": 8.545454545454545e-05, |
|
"loss": 1.97503125, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.11467033019876437, |
|
"learning_rate": 8.535353535353535e-05, |
|
"loss": 1.98446875, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.11541013878069188, |
|
"learning_rate": 8.525252525252526e-05, |
|
"loss": 1.997, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.1161499473626194, |
|
"learning_rate": 8.515151515151515e-05, |
|
"loss": 1.98559375, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.11688975594454691, |
|
"learning_rate": 8.505050505050506e-05, |
|
"loss": 1.99725, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.11762956452647443, |
|
"learning_rate": 8.494949494949495e-05, |
|
"loss": 1.997375, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.11836937310840193, |
|
"learning_rate": 8.484848484848486e-05, |
|
"loss": 1.98828125, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.11910918169032944, |
|
"learning_rate": 8.474747474747475e-05, |
|
"loss": 1.9948125, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.11984899027225696, |
|
"learning_rate": 8.464646464646466e-05, |
|
"loss": 1.986, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.12058879885418447, |
|
"learning_rate": 8.454545454545455e-05, |
|
"loss": 1.99940625, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.12132860743611198, |
|
"learning_rate": 8.444444444444444e-05, |
|
"loss": 1.99140625, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.12206841601803949, |
|
"learning_rate": 8.434343434343435e-05, |
|
"loss": 1.996375, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.122808224599967, |
|
"learning_rate": 8.424242424242424e-05, |
|
"loss": 1.99515625, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.12354803318189451, |
|
"learning_rate": 8.414141414141415e-05, |
|
"loss": 1.97484375, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.12428784176382203, |
|
"learning_rate": 8.404040404040404e-05, |
|
"loss": 1.99240625, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.12502765034574953, |
|
"learning_rate": 8.393939393939393e-05, |
|
"loss": 1.99184375, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.12576745892767704, |
|
"learning_rate": 8.383838383838384e-05, |
|
"loss": 1.99071875, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.12650726750960456, |
|
"learning_rate": 8.373737373737373e-05, |
|
"loss": 1.99640625, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.12724707609153207, |
|
"learning_rate": 8.363636363636364e-05, |
|
"loss": 2.009375, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.1279868846734596, |
|
"learning_rate": 8.353535353535355e-05, |
|
"loss": 2.01953125, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.1287266932553871, |
|
"learning_rate": 8.343434343434344e-05, |
|
"loss": 2.0111875, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.12946650183731462, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 2.014, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.13020631041924213, |
|
"learning_rate": 8.323232323232324e-05, |
|
"loss": 2.0209375, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.13094611900116965, |
|
"learning_rate": 8.313131313131314e-05, |
|
"loss": 1.98971875, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.13168592758309716, |
|
"learning_rate": 8.303030303030304e-05, |
|
"loss": 1.978, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.13242573616502465, |
|
"learning_rate": 8.292929292929293e-05, |
|
"loss": 2.0005, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.13316554474695216, |
|
"learning_rate": 8.282828282828283e-05, |
|
"loss": 2.01215625, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.13390535332887968, |
|
"learning_rate": 8.272727272727273e-05, |
|
"loss": 2.0359375, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.1346451619108072, |
|
"learning_rate": 8.262626262626263e-05, |
|
"loss": 2.0521875, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.1353849704927347, |
|
"learning_rate": 8.252525252525253e-05, |
|
"loss": 2.0225625, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.13612477907466222, |
|
"learning_rate": 8.242424242424243e-05, |
|
"loss": 2.02978125, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.13686458765658974, |
|
"learning_rate": 8.232323232323233e-05, |
|
"loss": 2.09690625, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.13760439623851725, |
|
"learning_rate": 8.222222222222222e-05, |
|
"loss": 2.14709375, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.13834420482044477, |
|
"learning_rate": 8.212121212121212e-05, |
|
"loss": 2.05809375, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.13908401340237228, |
|
"learning_rate": 8.202020202020202e-05, |
|
"loss": 2.24625, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.1398238219842998, |
|
"learning_rate": 8.191919191919192e-05, |
|
"loss": 2.2840625, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.14056363056622728, |
|
"learning_rate": 8.181818181818183e-05, |
|
"loss": 2.23384375, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.1413034391481548, |
|
"learning_rate": 8.171717171717172e-05, |
|
"loss": 2.27740625, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.1420432477300823, |
|
"learning_rate": 8.161616161616163e-05, |
|
"loss": 2.3976875, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.14278305631200983, |
|
"learning_rate": 8.151515151515152e-05, |
|
"loss": 2.22225, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.14352286489393734, |
|
"learning_rate": 8.141414141414141e-05, |
|
"loss": 2.28578125, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.14426267347586486, |
|
"learning_rate": 8.131313131313132e-05, |
|
"loss": 2.22359375, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.14500248205779237, |
|
"learning_rate": 8.121212121212121e-05, |
|
"loss": 2.16703125, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.14574229063971988, |
|
"learning_rate": 8.111111111111112e-05, |
|
"loss": 2.26603125, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.1464820992216474, |
|
"learning_rate": 8.101010101010101e-05, |
|
"loss": 2.159125, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.14722190780357491, |
|
"learning_rate": 8.090909090909092e-05, |
|
"loss": 2.02203125, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.14796171638550243, |
|
"learning_rate": 8.080808080808081e-05, |
|
"loss": 2.01090625, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.14870152496742992, |
|
"learning_rate": 8.07070707070707e-05, |
|
"loss": 2.0288125, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.14944133354935743, |
|
"learning_rate": 8.060606060606061e-05, |
|
"loss": 2.00403125, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.15018114213128494, |
|
"learning_rate": 8.05050505050505e-05, |
|
"loss": 1.99765625, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.15092095071321246, |
|
"learning_rate": 8.040404040404041e-05, |
|
"loss": 1.98784375, |
|
"step": 204000 |
|
} |
|
], |
|
"max_steps": 1000000, |
|
"num_train_epochs": 1, |
|
"total_flos": 369792323308800000 |
|
} |
|
|