{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15092095071321246, "global_step": 204000, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000739808581927512, "learning_rate": 1e-05, "loss": 9.4473583984375, "step": 1000 }, { "epoch": 0.001479617163855024, "learning_rate": 2e-05, "loss": 4.761673828125, "step": 2000 }, { "epoch": 0.002219425745782536, "learning_rate": 3e-05, "loss": 3.3295283203125, "step": 3000 }, { "epoch": 0.002959234327710048, "learning_rate": 4e-05, "loss": 2.793072265625, "step": 4000 }, { "epoch": 0.00369904290963756, "learning_rate": 5e-05, "loss": 2.51521875, "step": 5000 }, { "epoch": 0.004438851491565072, "learning_rate": 6e-05, "loss": 2.3833671875, "step": 6000 }, { "epoch": 0.005178660073492584, "learning_rate": 7e-05, "loss": 2.28459765625, "step": 7000 }, { "epoch": 0.005918468655420096, "learning_rate": 8e-05, "loss": 2.228125, "step": 8000 }, { "epoch": 0.006658277237347608, "learning_rate": 9e-05, "loss": 2.19816015625, "step": 9000 }, { "epoch": 0.00739808581927512, "learning_rate": 0.0001, "loss": 2.15448828125, "step": 10000 }, { "epoch": 0.008137894401202633, "learning_rate": 9.98989898989899e-05, "loss": 2.13649609375, "step": 11000 }, { "epoch": 0.008877702983130144, "learning_rate": 9.97979797979798e-05, "loss": 2.08796484375, "step": 12000 }, { "epoch": 0.009617511565057657, "learning_rate": 9.96969696969697e-05, "loss": 2.0968984375, "step": 13000 }, { "epoch": 0.010357320146985169, "learning_rate": 9.95959595959596e-05, "loss": 2.08955078125, "step": 14000 }, { "epoch": 0.011097128728912681, "learning_rate": 9.94949494949495e-05, "loss": 2.0763828125, "step": 15000 }, { "epoch": 0.011836937310840193, "learning_rate": 9.939393939393939e-05, "loss": 2.06798828125, "step": 16000 }, { "epoch": 0.012576745892767706, "learning_rate": 9.92929292929293e-05, "loss": 2.06260546875, "step": 17000 }, { "epoch": 0.013316554474695217, "learning_rate": 9.919191919191919e-05, "loss": 2.058328125, "step": 18000 }, { "epoch": 0.01405636305662273, "learning_rate": 9.909090909090911e-05, "loss": 2.04747265625, "step": 19000 }, { "epoch": 0.01479617163855024, "learning_rate": 9.8989898989899e-05, "loss": 2.0418671875, "step": 20000 }, { "epoch": 0.015535980220477754, "learning_rate": 9.888888888888889e-05, "loss": 2.0486484375, "step": 21000 }, { "epoch": 0.016275788802405267, "learning_rate": 9.87878787878788e-05, "loss": 2.052984375, "step": 22000 }, { "epoch": 0.017015597384332778, "learning_rate": 9.868686868686869e-05, "loss": 2.0374609375, "step": 23000 }, { "epoch": 0.01775540596626029, "learning_rate": 9.85858585858586e-05, "loss": 2.03405078125, "step": 24000 }, { "epoch": 0.018495214548187804, "learning_rate": 9.848484848484849e-05, "loss": 2.04113671875, "step": 25000 }, { "epoch": 0.019235023130115315, "learning_rate": 9.838383838383838e-05, "loss": 2.02526953125, "step": 26000 }, { "epoch": 0.019974831712042826, "learning_rate": 9.828282828282829e-05, "loss": 2.0195234375, "step": 27000 }, { "epoch": 0.020714640293970337, "learning_rate": 9.818181818181818e-05, "loss": 2.0342734375, "step": 28000 }, { "epoch": 0.021454448875897852, "learning_rate": 9.808080808080809e-05, "loss": 2.0206328125, "step": 29000 }, { "epoch": 0.022194257457825363, "learning_rate": 9.797979797979798e-05, "loss": 2.0215859375, "step": 30000 }, { "epoch": 0.022934066039752874, "learning_rate": 9.787878787878789e-05, "loss": 2.0064453125, "step": 31000 }, { "epoch": 0.023673874621680385, "learning_rate": 9.777777777777778e-05, "loss": 2.0098984375, "step": 32000 }, { "epoch": 0.0244136832036079, "learning_rate": 9.767676767676767e-05, "loss": 2.0178046875, "step": 33000 }, { "epoch": 0.02515349178553541, "learning_rate": 9.757575757575758e-05, "loss": 2.01334375, "step": 34000 }, { "epoch": 0.025893300367462922, "learning_rate": 9.747474747474747e-05, "loss": 2.01625, "step": 35000 }, { "epoch": 0.026633108949390433, "learning_rate": 9.737373737373738e-05, "loss": 2.0108828125, "step": 36000 }, { "epoch": 0.027372917531317948, "learning_rate": 9.727272727272728e-05, "loss": 2.018734375, "step": 37000 }, { "epoch": 0.02811272611324546, "learning_rate": 9.717171717171718e-05, "loss": 2.003578125, "step": 38000 }, { "epoch": 0.02885253469517297, "learning_rate": 9.707070707070708e-05, "loss": 2.01503125, "step": 39000 }, { "epoch": 0.02959234327710048, "learning_rate": 9.696969696969698e-05, "loss": 2.016765625, "step": 40000 }, { "epoch": 0.030332151859027996, "learning_rate": 9.686868686868688e-05, "loss": 2.0055703125, "step": 41000 }, { "epoch": 0.031071960440955507, "learning_rate": 9.676767676767677e-05, "loss": 2.015375, "step": 42000 }, { "epoch": 0.03181176902288302, "learning_rate": 9.666666666666667e-05, "loss": 2.0087421875, "step": 43000 }, { "epoch": 0.03255157760481053, "learning_rate": 9.656565656565657e-05, "loss": 2.016359375, "step": 44000 }, { "epoch": 0.03329138618673804, "learning_rate": 9.646464646464647e-05, "loss": 2.015015625, "step": 45000 }, { "epoch": 0.034031194768665556, "learning_rate": 9.636363636363637e-05, "loss": 2.025421875, "step": 46000 }, { "epoch": 0.03477100335059307, "learning_rate": 9.626262626262627e-05, "loss": 2.0208671875, "step": 47000 }, { "epoch": 0.03551081193252058, "learning_rate": 9.616161616161616e-05, "loss": 2.00615625, "step": 48000 }, { "epoch": 0.03625062051444809, "learning_rate": 9.606060606060606e-05, "loss": 2.00828125, "step": 49000 }, { "epoch": 0.03699042909637561, "learning_rate": 9.595959595959596e-05, "loss": 1.998234375, "step": 50000 }, { "epoch": 0.037730237678303115, "learning_rate": 9.585858585858586e-05, "loss": 2.0086640625, "step": 51000 }, { "epoch": 0.03847004626023063, "learning_rate": 9.575757575757576e-05, "loss": 1.9955234375, "step": 52000 }, { "epoch": 0.03920985484215814, "learning_rate": 9.565656565656566e-05, "loss": 2.0098984375, "step": 53000 }, { "epoch": 0.03994966342408565, "learning_rate": 9.555555555555557e-05, "loss": 1.9921015625, "step": 54000 }, { "epoch": 0.040689472006013166, "learning_rate": 9.545454545454546e-05, "loss": 2.0024765625, "step": 55000 }, { "epoch": 0.041429280587940674, "learning_rate": 9.535353535353537e-05, "loss": 2.0018515625, "step": 56000 }, { "epoch": 0.04216908916986819, "learning_rate": 9.525252525252526e-05, "loss": 2.009984375, "step": 57000 }, { "epoch": 0.042908897751795703, "learning_rate": 9.515151515151515e-05, "loss": 2.00109375, "step": 58000 }, { "epoch": 0.04364870633372321, "learning_rate": 9.505050505050506e-05, "loss": 2.017796875, "step": 59000 }, { "epoch": 0.044388514915650726, "learning_rate": 9.494949494949495e-05, "loss": 2.005921875, "step": 60000 }, { "epoch": 0.045128323497578234, "learning_rate": 9.484848484848486e-05, "loss": 2.012265625, "step": 61000 }, { "epoch": 0.04586813207950575, "learning_rate": 9.474747474747475e-05, "loss": 1.99825, "step": 62000 }, { "epoch": 0.04660794066143326, "learning_rate": 9.464646464646464e-05, "loss": 2.003953125, "step": 63000 }, { "epoch": 0.04734774924336077, "learning_rate": 9.454545454545455e-05, "loss": 2.019515625, "step": 64000 }, { "epoch": 0.048087557825288285, "learning_rate": 9.444444444444444e-05, "loss": 2.00675, "step": 65000 }, { "epoch": 0.0488273664072158, "learning_rate": 9.434343434343435e-05, "loss": 2.005234375, "step": 66000 }, { "epoch": 0.04956717498914331, "learning_rate": 9.424242424242424e-05, "loss": 1.9940625, "step": 67000 }, { "epoch": 0.05030698357107082, "learning_rate": 9.414141414141415e-05, "loss": 1.999109375, "step": 68000 }, { "epoch": 0.05104679215299833, "learning_rate": 9.404040404040404e-05, "loss": 2.00134375, "step": 69000 }, { "epoch": 0.051786600734925844, "learning_rate": 9.393939393939395e-05, "loss": 2.007953125, "step": 70000 }, { "epoch": 0.05252640931685336, "learning_rate": 9.383838383838385e-05, "loss": 2.005171875, "step": 71000 }, { "epoch": 0.05326621789878087, "learning_rate": 9.373737373737375e-05, "loss": 2.000734375, "step": 72000 }, { "epoch": 0.05400602648070838, "learning_rate": 9.363636363636364e-05, "loss": 2.01009375, "step": 73000 }, { "epoch": 0.054745835062635896, "learning_rate": 9.353535353535354e-05, "loss": 2.006703125, "step": 74000 }, { "epoch": 0.055485643644563404, "learning_rate": 9.343434343434344e-05, "loss": 2.019640625, "step": 75000 }, { "epoch": 0.05622545222649092, "learning_rate": 9.333333333333334e-05, "loss": 1.999078125, "step": 76000 }, { "epoch": 0.05696526080841843, "learning_rate": 9.323232323232324e-05, "loss": 1.995890625, "step": 77000 }, { "epoch": 0.05770506939034594, "learning_rate": 9.313131313131314e-05, "loss": 2.00525, "step": 78000 }, { "epoch": 0.058444877972273455, "learning_rate": 9.303030303030303e-05, "loss": 2.0128125, "step": 79000 }, { "epoch": 0.05918468655420096, "learning_rate": 9.292929292929293e-05, "loss": 2.006140625, "step": 80000 }, { "epoch": 0.05992449513612848, "learning_rate": 9.282828282828283e-05, "loss": 2.0091875, "step": 81000 }, { "epoch": 0.06066430371805599, "learning_rate": 9.272727272727273e-05, "loss": 1.997203125, "step": 82000 }, { "epoch": 0.0614041122999835, "learning_rate": 9.262626262626263e-05, "loss": 2.020015625, "step": 83000 }, { "epoch": 0.062143920881911015, "learning_rate": 9.252525252525253e-05, "loss": 2.028375, "step": 84000 }, { "epoch": 0.06288372946383852, "learning_rate": 9.242424242424242e-05, "loss": 2.006765625, "step": 85000 }, { "epoch": 0.06362353804576604, "learning_rate": 9.232323232323232e-05, "loss": 2.016703125, "step": 86000 }, { "epoch": 0.06436334662769355, "learning_rate": 9.222222222222223e-05, "loss": 2.041, "step": 87000 }, { "epoch": 0.06510315520962107, "learning_rate": 9.212121212121214e-05, "loss": 2.029640625, "step": 88000 }, { "epoch": 0.06584296379154858, "learning_rate": 9.202020202020203e-05, "loss": 2.005890625, "step": 89000 }, { "epoch": 0.06658277237347608, "learning_rate": 9.191919191919192e-05, "loss": 1.997921875, "step": 90000 }, { "epoch": 0.0673225809554036, "learning_rate": 9.181818181818183e-05, "loss": 1.993796875, "step": 91000 }, { "epoch": 0.06806238953733111, "learning_rate": 9.171717171717172e-05, "loss": 1.997109375, "step": 92000 }, { "epoch": 0.06880219811925863, "learning_rate": 9.161616161616163e-05, "loss": 2.004078125, "step": 93000 }, { "epoch": 0.06954200670118614, "learning_rate": 9.151515151515152e-05, "loss": 2.001828125, "step": 94000 }, { "epoch": 0.07028181528311364, "learning_rate": 9.141414141414141e-05, "loss": 1.9931875, "step": 95000 }, { "epoch": 0.07102162386504116, "learning_rate": 9.131313131313132e-05, "loss": 1.987171875, "step": 96000 }, { "epoch": 0.07176143244696867, "learning_rate": 9.121212121212121e-05, "loss": 2.0305, "step": 97000 }, { "epoch": 0.07250124102889619, "learning_rate": 9.111111111111112e-05, "loss": 1.999484375, "step": 98000 }, { "epoch": 0.0732410496108237, "learning_rate": 9.101010101010101e-05, "loss": 1.989171875, "step": 99000 }, { "epoch": 0.07398085819275121, "learning_rate": 9.090909090909092e-05, "loss": 1.988671875, "step": 100000 }, { "epoch": 0.07472066677467872, "learning_rate": 9.080808080808081e-05, "loss": 1.989, "step": 101000 }, { "epoch": 0.07546047535660623, "learning_rate": 9.07070707070707e-05, "loss": 1.99684375, "step": 102000 }, { "epoch": 0.07620028393853374, "learning_rate": 9.060606060606061e-05, "loss": 2.004921875, "step": 103000 }, { "epoch": 0.07694009252046126, "learning_rate": 9.050505050505052e-05, "loss": 2.01390625, "step": 104000 }, { "epoch": 0.07767990110238877, "learning_rate": 9.040404040404041e-05, "loss": 1.98865625, "step": 105000 }, { "epoch": 0.07841970968431627, "learning_rate": 9.030303030303031e-05, "loss": 1.98871875, "step": 106000 }, { "epoch": 0.07915951826624379, "learning_rate": 9.02020202020202e-05, "loss": 1.994046875, "step": 107000 }, { "epoch": 0.0798993268481713, "learning_rate": 9.010101010101011e-05, "loss": 2.010546875, "step": 108000 }, { "epoch": 0.08063913543009882, "learning_rate": 9e-05, "loss": 2.005640625, "step": 109000 }, { "epoch": 0.08137894401202633, "learning_rate": 8.98989898989899e-05, "loss": 1.99709375, "step": 110000 }, { "epoch": 0.08211875259395383, "learning_rate": 8.97979797979798e-05, "loss": 2.01225, "step": 111000 }, { "epoch": 0.08285856117588135, "learning_rate": 8.96969696969697e-05, "loss": 1.982421875, "step": 112000 }, { "epoch": 0.08359836975780886, "learning_rate": 8.95959595959596e-05, "loss": 1.988484375, "step": 113000 }, { "epoch": 0.08433817833973638, "learning_rate": 8.94949494949495e-05, "loss": 2.001359375, "step": 114000 }, { "epoch": 0.08507798692166389, "learning_rate": 8.93939393939394e-05, "loss": 2.001796875, "step": 115000 }, { "epoch": 0.08581779550359141, "learning_rate": 8.92929292929293e-05, "loss": 2.012875, "step": 116000 }, { "epoch": 0.08655760408551891, "learning_rate": 8.919191919191919e-05, "loss": 2.007953125, "step": 117000 }, { "epoch": 0.08729741266744642, "learning_rate": 8.90909090909091e-05, "loss": 2.00078125, "step": 118000 }, { "epoch": 0.08803722124937394, "learning_rate": 8.898989898989899e-05, "loss": 2.02653125, "step": 119000 }, { "epoch": 0.08877702983130145, "learning_rate": 8.888888888888889e-05, "loss": 2.006796875, "step": 120000 }, { "epoch": 0.08951683841322897, "learning_rate": 8.87878787878788e-05, "loss": 2.007296875, "step": 121000 }, { "epoch": 0.09025664699515647, "learning_rate": 8.868686868686869e-05, "loss": 2.012890625, "step": 122000 }, { "epoch": 0.09099645557708398, "learning_rate": 8.85858585858586e-05, "loss": 2.012546875, "step": 123000 }, { "epoch": 0.0917362641590115, "learning_rate": 8.848484848484849e-05, "loss": 2.016765625, "step": 124000 }, { "epoch": 0.09247607274093901, "learning_rate": 8.83838383838384e-05, "loss": 1.985875, "step": 125000 }, { "epoch": 0.09321588132286653, "learning_rate": 8.828282828282829e-05, "loss": 2.0315, "step": 126000 }, { "epoch": 0.09395568990479404, "learning_rate": 8.818181818181818e-05, "loss": 2.061625, "step": 127000 }, { "epoch": 0.09469549848672154, "learning_rate": 8.808080808080809e-05, "loss": 2.03778125, "step": 128000 }, { "epoch": 0.09543530706864906, "learning_rate": 8.797979797979798e-05, "loss": 2.0075625, "step": 129000 }, { "epoch": 0.09617511565057657, "learning_rate": 8.787878787878789e-05, "loss": 1.9984375, "step": 130000 }, { "epoch": 0.09691492423250408, "learning_rate": 8.777777777777778e-05, "loss": 1.99621875, "step": 131000 }, { "epoch": 0.0976547328144316, "learning_rate": 8.767676767676767e-05, "loss": 2.01059375, "step": 132000 }, { "epoch": 0.0983945413963591, "learning_rate": 8.757575757575758e-05, "loss": 2.0471875, "step": 133000 }, { "epoch": 0.09913434997828661, "learning_rate": 8.747474747474747e-05, "loss": 2.01190625, "step": 134000 }, { "epoch": 0.09987415856021413, "learning_rate": 8.737373737373738e-05, "loss": 2.00653125, "step": 135000 }, { "epoch": 0.10061396714214164, "learning_rate": 8.727272727272727e-05, "loss": 2.014375, "step": 136000 }, { "epoch": 0.10135377572406916, "learning_rate": 8.717171717171718e-05, "loss": 2.00065625, "step": 137000 }, { "epoch": 0.10209358430599666, "learning_rate": 8.707070707070707e-05, "loss": 2.031875, "step": 138000 }, { "epoch": 0.10283339288792417, "learning_rate": 8.696969696969698e-05, "loss": 1.99303125, "step": 139000 }, { "epoch": 0.10357320146985169, "learning_rate": 8.686868686868688e-05, "loss": 1.97703125, "step": 140000 }, { "epoch": 0.1043130100517792, "learning_rate": 8.676767676767678e-05, "loss": 1.99440625, "step": 141000 }, { "epoch": 0.10505281863370672, "learning_rate": 8.666666666666667e-05, "loss": 2.00790625, "step": 142000 }, { "epoch": 0.10579262721563423, "learning_rate": 8.656565656565657e-05, "loss": 1.9769375, "step": 143000 }, { "epoch": 0.10653243579756173, "learning_rate": 8.646464646464647e-05, "loss": 1.98775, "step": 144000 }, { "epoch": 0.10727224437948925, "learning_rate": 8.636363636363637e-05, "loss": 1.970875, "step": 145000 }, { "epoch": 0.10801205296141676, "learning_rate": 8.626262626262627e-05, "loss": 1.97203125, "step": 146000 }, { "epoch": 0.10875186154334428, "learning_rate": 8.616161616161616e-05, "loss": 1.97359375, "step": 147000 }, { "epoch": 0.10949167012527179, "learning_rate": 8.606060606060606e-05, "loss": 1.97471875, "step": 148000 }, { "epoch": 0.11023147870719929, "learning_rate": 8.595959595959596e-05, "loss": 1.96978125, "step": 149000 }, { "epoch": 0.11097128728912681, "learning_rate": 8.585858585858586e-05, "loss": 1.9813125, "step": 150000 }, { "epoch": 0.11171109587105432, "learning_rate": 8.575757575757576e-05, "loss": 1.9761875, "step": 151000 }, { "epoch": 0.11245090445298184, "learning_rate": 8.565656565656566e-05, "loss": 1.9814375, "step": 152000 }, { "epoch": 0.11319071303490935, "learning_rate": 8.555555555555556e-05, "loss": 1.97459375, "step": 153000 }, { "epoch": 0.11393052161683687, "learning_rate": 8.545454545454545e-05, "loss": 1.97503125, "step": 154000 }, { "epoch": 0.11467033019876437, "learning_rate": 8.535353535353535e-05, "loss": 1.98446875, "step": 155000 }, { "epoch": 0.11541013878069188, "learning_rate": 8.525252525252526e-05, "loss": 1.997, "step": 156000 }, { "epoch": 0.1161499473626194, "learning_rate": 8.515151515151515e-05, "loss": 1.98559375, "step": 157000 }, { "epoch": 0.11688975594454691, "learning_rate": 8.505050505050506e-05, "loss": 1.99725, "step": 158000 }, { "epoch": 0.11762956452647443, "learning_rate": 8.494949494949495e-05, "loss": 1.997375, "step": 159000 }, { "epoch": 0.11836937310840193, "learning_rate": 8.484848484848486e-05, "loss": 1.98828125, "step": 160000 }, { "epoch": 0.11910918169032944, "learning_rate": 8.474747474747475e-05, "loss": 1.9948125, "step": 161000 }, { "epoch": 0.11984899027225696, "learning_rate": 8.464646464646466e-05, "loss": 1.986, "step": 162000 }, { "epoch": 0.12058879885418447, "learning_rate": 8.454545454545455e-05, "loss": 1.99940625, "step": 163000 }, { "epoch": 0.12132860743611198, "learning_rate": 8.444444444444444e-05, "loss": 1.99140625, "step": 164000 }, { "epoch": 0.12206841601803949, "learning_rate": 8.434343434343435e-05, "loss": 1.996375, "step": 165000 }, { "epoch": 0.122808224599967, "learning_rate": 8.424242424242424e-05, "loss": 1.99515625, "step": 166000 }, { "epoch": 0.12354803318189451, "learning_rate": 8.414141414141415e-05, "loss": 1.97484375, "step": 167000 }, { "epoch": 0.12428784176382203, "learning_rate": 8.404040404040404e-05, "loss": 1.99240625, "step": 168000 }, { "epoch": 0.12502765034574953, "learning_rate": 8.393939393939393e-05, "loss": 1.99184375, "step": 169000 }, { "epoch": 0.12576745892767704, "learning_rate": 8.383838383838384e-05, "loss": 1.99071875, "step": 170000 }, { "epoch": 0.12650726750960456, "learning_rate": 8.373737373737373e-05, "loss": 1.99640625, "step": 171000 }, { "epoch": 0.12724707609153207, "learning_rate": 8.363636363636364e-05, "loss": 2.009375, "step": 172000 }, { "epoch": 0.1279868846734596, "learning_rate": 8.353535353535355e-05, "loss": 2.01953125, "step": 173000 }, { "epoch": 0.1287266932553871, "learning_rate": 8.343434343434344e-05, "loss": 2.0111875, "step": 174000 }, { "epoch": 0.12946650183731462, "learning_rate": 8.333333333333334e-05, "loss": 2.014, "step": 175000 }, { "epoch": 0.13020631041924213, "learning_rate": 8.323232323232324e-05, "loss": 2.0209375, "step": 176000 }, { "epoch": 0.13094611900116965, "learning_rate": 8.313131313131314e-05, "loss": 1.98971875, "step": 177000 }, { "epoch": 0.13168592758309716, "learning_rate": 8.303030303030304e-05, "loss": 1.978, "step": 178000 }, { "epoch": 0.13242573616502465, "learning_rate": 8.292929292929293e-05, "loss": 2.0005, "step": 179000 }, { "epoch": 0.13316554474695216, "learning_rate": 8.282828282828283e-05, "loss": 2.01215625, "step": 180000 }, { "epoch": 0.13390535332887968, "learning_rate": 8.272727272727273e-05, "loss": 2.0359375, "step": 181000 }, { "epoch": 0.1346451619108072, "learning_rate": 8.262626262626263e-05, "loss": 2.0521875, "step": 182000 }, { "epoch": 0.1353849704927347, "learning_rate": 8.252525252525253e-05, "loss": 2.0225625, "step": 183000 }, { "epoch": 0.13612477907466222, "learning_rate": 8.242424242424243e-05, "loss": 2.02978125, "step": 184000 }, { "epoch": 0.13686458765658974, "learning_rate": 8.232323232323233e-05, "loss": 2.09690625, "step": 185000 }, { "epoch": 0.13760439623851725, "learning_rate": 8.222222222222222e-05, "loss": 2.14709375, "step": 186000 }, { "epoch": 0.13834420482044477, "learning_rate": 8.212121212121212e-05, "loss": 2.05809375, "step": 187000 }, { "epoch": 0.13908401340237228, "learning_rate": 8.202020202020202e-05, "loss": 2.24625, "step": 188000 }, { "epoch": 0.1398238219842998, "learning_rate": 8.191919191919192e-05, "loss": 2.2840625, "step": 189000 }, { "epoch": 0.14056363056622728, "learning_rate": 8.181818181818183e-05, "loss": 2.23384375, "step": 190000 }, { "epoch": 0.1413034391481548, "learning_rate": 8.171717171717172e-05, "loss": 2.27740625, "step": 191000 }, { "epoch": 0.1420432477300823, "learning_rate": 8.161616161616163e-05, "loss": 2.3976875, "step": 192000 }, { "epoch": 0.14278305631200983, "learning_rate": 8.151515151515152e-05, "loss": 2.22225, "step": 193000 }, { "epoch": 0.14352286489393734, "learning_rate": 8.141414141414141e-05, "loss": 2.28578125, "step": 194000 }, { "epoch": 0.14426267347586486, "learning_rate": 8.131313131313132e-05, "loss": 2.22359375, "step": 195000 }, { "epoch": 0.14500248205779237, "learning_rate": 8.121212121212121e-05, "loss": 2.16703125, "step": 196000 }, { "epoch": 0.14574229063971988, "learning_rate": 8.111111111111112e-05, "loss": 2.26603125, "step": 197000 }, { "epoch": 0.1464820992216474, "learning_rate": 8.101010101010101e-05, "loss": 2.159125, "step": 198000 }, { "epoch": 0.14722190780357491, "learning_rate": 8.090909090909092e-05, "loss": 2.02203125, "step": 199000 }, { "epoch": 0.14796171638550243, "learning_rate": 8.080808080808081e-05, "loss": 2.01090625, "step": 200000 }, { "epoch": 0.14870152496742992, "learning_rate": 8.07070707070707e-05, "loss": 2.0288125, "step": 201000 }, { "epoch": 0.14944133354935743, "learning_rate": 8.060606060606061e-05, "loss": 2.00403125, "step": 202000 }, { "epoch": 0.15018114213128494, "learning_rate": 8.05050505050505e-05, "loss": 1.99765625, "step": 203000 }, { "epoch": 0.15092095071321246, "learning_rate": 8.040404040404041e-05, "loss": 1.98784375, "step": 204000 } ], "max_steps": 1000000, "num_train_epochs": 1, "total_flos": 369792323308800000 }