{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.796988998262883, "global_step": 120000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 8.669214732453091e-06, "loss": 49.8699, "step": 500 }, { "epoch": 0.23, "learning_rate": 1.7355802640722724e-05, "loss": 41.2735, "step": 1000 }, { "epoch": 0.35, "learning_rate": 2.6042390548992352e-05, "loss": 36.9489, "step": 1500 }, { "epoch": 0.46, "learning_rate": 3.472897845726199e-05, "loss": 33.9779, "step": 2000 }, { "epoch": 0.58, "learning_rate": 4.341556636553162e-05, "loss": 31.7843, "step": 2500 }, { "epoch": 0.69, "learning_rate": 5.210215427380125e-05, "loss": 30.3453, "step": 3000 }, { "epoch": 0.81, "learning_rate": 6.0788742182070875e-05, "loss": 29.3517, "step": 3500 }, { "epoch": 0.93, "learning_rate": 6.945795691452397e-05, "loss": 28.5657, "step": 4000 }, { "epoch": 1.04, "learning_rate": 7.81445448227936e-05, "loss": 27.9059, "step": 4500 }, { "epoch": 1.16, "learning_rate": 8.683113273106324e-05, "loss": 27.256, "step": 5000 }, { "epoch": 1.27, "learning_rate": 9.550034746351632e-05, "loss": 26.6368, "step": 5500 }, { "epoch": 1.39, "learning_rate": 0.00010418693537178596, "loss": 26.0616, "step": 6000 }, { "epoch": 1.51, "learning_rate": 0.00011287352328005559, "loss": 25.5779, "step": 6500 }, { "epoch": 1.62, "learning_rate": 0.00012154273801250867, "loss": 25.154, "step": 7000 }, { "epoch": 1.74, "learning_rate": 0.0001302293259207783, "loss": 24.757, "step": 7500 }, { "epoch": 1.85, "learning_rate": 0.00013891591382904795, "loss": 24.3901, "step": 8000 }, { "epoch": 1.97, "learning_rate": 0.0001476025017373176, "loss": 24.0289, "step": 8500 }, { "epoch": 2.08, "learning_rate": 0.0001562890896455872, "loss": 23.7116, "step": 9000 }, { "epoch": 2.2, "learning_rate": 0.0001649756775538568, "loss": 23.3895, "step": 9500 }, { "epoch": 2.32, "learning_rate": 0.00017366226546212648, "loss": 23.1314, "step": 10000 }, { "epoch": 2.43, "learning_rate": 0.00018234885337039609, "loss": 22.8528, "step": 10500 }, { "epoch": 2.55, "learning_rate": 0.00019103544127866572, "loss": 22.5796, "step": 11000 }, { "epoch": 2.66, "learning_rate": 0.00019972202918693536, "loss": 22.3478, "step": 11500 }, { "epoch": 2.78, "learning_rate": 0.000208408617095205, "loss": 22.1018, "step": 12000 }, { "epoch": 2.9, "learning_rate": 0.0002170952050034746, "loss": 21.8626, "step": 12500 }, { "epoch": 3.01, "learning_rate": 0.0002257644197359277, "loss": 21.6681, "step": 13000 }, { "epoch": 3.13, "learning_rate": 0.00023445100764419736, "loss": 21.4255, "step": 13500 }, { "epoch": 3.24, "learning_rate": 0.00024313759555246697, "loss": 21.2724, "step": 14000 }, { "epoch": 3.36, "learning_rate": 0.0002518241834607366, "loss": 21.0849, "step": 14500 }, { "epoch": 3.47, "learning_rate": 0.00026051077136900625, "loss": 20.8968, "step": 15000 }, { "epoch": 3.59, "learning_rate": 0.00026919735927727584, "loss": 20.7233, "step": 15500 }, { "epoch": 3.71, "learning_rate": 0.0002777970813064628, "loss": 20.5867, "step": 16000 }, { "epoch": 3.82, "learning_rate": 0.00028648366921473243, "loss": 20.4368, "step": 16500 }, { "epoch": 3.94, "learning_rate": 0.00029517025712300207, "loss": 20.3374, "step": 17000 }, { "epoch": 4.05, "learning_rate": 0.000299571461663192, "loss": 20.2117, "step": 17500 }, { "epoch": 4.17, "learning_rate": 0.00029860628522893984, "loss": 20.0622, "step": 18000 }, { "epoch": 4.29, "learning_rate": 0.00029764110879468763, "loss": 19.9614, "step": 18500 }, { "epoch": 4.4, "learning_rate": 0.0002966759323604355, "loss": 19.8524, "step": 19000 }, { "epoch": 4.52, "learning_rate": 0.0002957107559261833, "loss": 19.7578, "step": 19500 }, { "epoch": 4.63, "learning_rate": 0.00029474557949193107, "loss": 19.6587, "step": 20000 }, { "epoch": 4.75, "learning_rate": 0.0002937804030576789, "loss": 19.5385, "step": 20500 }, { "epoch": 4.86, "learning_rate": 0.0002928152266234267, "loss": 19.4285, "step": 21000 }, { "epoch": 4.98, "learning_rate": 0.00029185005018917456, "loss": 19.3376, "step": 21500 }, { "epoch": 5.1, "learning_rate": 0.0002908848737549224, "loss": 19.2065, "step": 22000 }, { "epoch": 5.21, "learning_rate": 0.0002899196973206702, "loss": 19.1089, "step": 22500 }, { "epoch": 5.33, "learning_rate": 0.000288954520886418, "loss": 19.0411, "step": 23000 }, { "epoch": 5.44, "learning_rate": 0.0002879893444521658, "loss": 18.9864, "step": 23500 }, { "epoch": 5.56, "learning_rate": 0.00028702416801791364, "loss": 18.9157, "step": 24000 }, { "epoch": 5.68, "learning_rate": 0.0002860589915836615, "loss": 18.8606, "step": 24500 }, { "epoch": 5.79, "learning_rate": 0.0002850938151494093, "loss": 18.8049, "step": 25000 }, { "epoch": 5.91, "learning_rate": 0.0002841286387151571, "loss": 18.7415, "step": 25500 }, { "epoch": 6.02, "learning_rate": 0.0002831634622809049, "loss": 18.695, "step": 26000 }, { "epoch": 6.14, "learning_rate": 0.0002821982858466527, "loss": 18.5979, "step": 26500 }, { "epoch": 6.25, "learning_rate": 0.00028123310941240056, "loss": 18.5566, "step": 27000 }, { "epoch": 6.37, "learning_rate": 0.0002802698633310169, "loss": 18.5145, "step": 27500 }, { "epoch": 6.49, "learning_rate": 0.0002793046868967647, "loss": 18.465, "step": 28000 }, { "epoch": 6.6, "learning_rate": 0.0002783395104625125, "loss": 18.4232, "step": 28500 }, { "epoch": 6.72, "learning_rate": 0.00027737433402826034, "loss": 18.368, "step": 29000 }, { "epoch": 6.83, "learning_rate": 0.00027640915759400813, "loss": 18.315, "step": 29500 }, { "epoch": 6.95, "learning_rate": 0.000275443981159756, "loss": 18.2882, "step": 30000 }, { "epoch": 7.07, "learning_rate": 0.00027447880472550383, "loss": 18.2239, "step": 30500 }, { "epoch": 7.18, "learning_rate": 0.0002735136282912516, "loss": 18.163, "step": 31000 }, { "epoch": 7.3, "learning_rate": 0.0002725503822098679, "loss": 18.1283, "step": 31500 }, { "epoch": 7.41, "learning_rate": 0.00027158520577561575, "loss": 18.1016, "step": 32000 }, { "epoch": 7.53, "learning_rate": 0.0002706200293413636, "loss": 18.0663, "step": 32500 }, { "epoch": 7.64, "learning_rate": 0.0002696567832599799, "loss": 18.0263, "step": 33000 }, { "epoch": 7.76, "learning_rate": 0.00026869160682572773, "loss": 17.9946, "step": 33500 }, { "epoch": 7.88, "learning_rate": 0.00026772643039147553, "loss": 17.9719, "step": 34000 }, { "epoch": 7.99, "learning_rate": 0.0002667612539572234, "loss": 17.931, "step": 34500 }, { "epoch": 8.11, "learning_rate": 0.00026579607752297117, "loss": 17.8646, "step": 35000 }, { "epoch": 8.22, "learning_rate": 0.000264830901088719, "loss": 17.8358, "step": 35500 }, { "epoch": 8.34, "learning_rate": 0.0002638676550073353, "loss": 17.8081, "step": 36000 }, { "epoch": 8.45, "learning_rate": 0.00026290247857308315, "loss": 17.7928, "step": 36500 }, { "epoch": 8.57, "learning_rate": 0.00026193730213883095, "loss": 17.7663, "step": 37000 }, { "epoch": 8.69, "learning_rate": 0.0002609721257045788, "loss": 17.7224, "step": 37500 }, { "epoch": 8.8, "learning_rate": 0.0002600069492703266, "loss": 17.7134, "step": 38000 }, { "epoch": 8.92, "learning_rate": 0.0002590417728360744, "loss": 17.6934, "step": 38500 }, { "epoch": 9.03, "learning_rate": 0.00025807659640182223, "loss": 17.662, "step": 39000 }, { "epoch": 9.15, "learning_rate": 0.0002571114199675701, "loss": 17.5886, "step": 39500 }, { "epoch": 9.27, "learning_rate": 0.00025615010423905485, "loss": 17.5854, "step": 40000 }, { "epoch": 9.38, "learning_rate": 0.0002551849278048027, "loss": 17.5689, "step": 40500 }, { "epoch": 9.5, "learning_rate": 0.00025421975137055055, "loss": 17.5545, "step": 41000 }, { "epoch": 9.61, "learning_rate": 0.00025325650528916683, "loss": 17.5269, "step": 41500 }, { "epoch": 9.73, "learning_rate": 0.00025229132885491463, "loss": 17.5051, "step": 42000 }, { "epoch": 9.84, "learning_rate": 0.0002513261524206625, "loss": 17.4922, "step": 42500 }, { "epoch": 9.96, "learning_rate": 0.0002503609759864103, "loss": 17.4682, "step": 43000 }, { "epoch": 10.08, "learning_rate": 0.0002493957995521581, "loss": 17.4151, "step": 43500 }, { "epoch": 10.19, "learning_rate": 0.0002484306231179059, "loss": 17.3825, "step": 44000 }, { "epoch": 10.31, "learning_rate": 0.00024746544668365376, "loss": 17.3681, "step": 44500 }, { "epoch": 10.42, "learning_rate": 0.00024650027024940155, "loss": 17.341, "step": 45000 }, { "epoch": 10.54, "learning_rate": 0.0002455350938151494, "loss": 17.3344, "step": 45500 }, { "epoch": 10.66, "learning_rate": 0.0002445699173808972, "loss": 17.316, "step": 46000 }, { "epoch": 10.77, "learning_rate": 0.00024360474094664502, "loss": 17.3002, "step": 46500 }, { "epoch": 10.89, "learning_rate": 0.00024264149486526136, "loss": 17.279, "step": 47000 }, { "epoch": 11.0, "learning_rate": 0.00024167631843100918, "loss": 17.2714, "step": 47500 }, { "epoch": 11.12, "learning_rate": 0.00024071114199675697, "loss": 17.2032, "step": 48000 }, { "epoch": 11.23, "learning_rate": 0.00023974596556250482, "loss": 17.1982, "step": 48500 }, { "epoch": 11.35, "learning_rate": 0.00023878271948112113, "loss": 17.1764, "step": 49000 }, { "epoch": 11.47, "learning_rate": 0.00023781754304686895, "loss": 17.1697, "step": 49500 }, { "epoch": 11.58, "learning_rate": 0.00023685236661261674, "loss": 17.1619, "step": 50000 }, { "epoch": 11.7, "learning_rate": 0.0002358871901783646, "loss": 17.1475, "step": 50500 }, { "epoch": 11.81, "learning_rate": 0.0002349239440969809, "loss": 17.1354, "step": 51000 }, { "epoch": 11.93, "learning_rate": 0.00023395876766272873, "loss": 17.1193, "step": 51500 }, { "epoch": 12.05, "learning_rate": 0.00023299359122847657, "loss": 17.0976, "step": 52000 }, { "epoch": 12.16, "learning_rate": 0.00023203034514709289, "loss": 17.0559, "step": 52500 }, { "epoch": 12.28, "learning_rate": 0.00023106516871284068, "loss": 17.0478, "step": 53000 }, { "epoch": 12.39, "learning_rate": 0.0002300999922785885, "loss": 17.0457, "step": 53500 }, { "epoch": 12.51, "learning_rate": 0.00022913481584433635, "loss": 17.0296, "step": 54000 }, { "epoch": 12.62, "learning_rate": 0.00022816963941008414, "loss": 17.0185, "step": 54500 }, { "epoch": 12.74, "learning_rate": 0.00022720639332870045, "loss": 17.012, "step": 55000 }, { "epoch": 12.86, "learning_rate": 0.0002262431472473168, "loss": 16.9982, "step": 55500 }, { "epoch": 12.97, "learning_rate": 0.00022527990116593313, "loss": 16.981, "step": 56000 }, { "epoch": 13.09, "learning_rate": 0.00022431472473168093, "loss": 16.9492, "step": 56500 }, { "epoch": 13.2, "learning_rate": 0.00022335147865029726, "loss": 16.9352, "step": 57000 }, { "epoch": 13.32, "learning_rate": 0.00022238630221604506, "loss": 16.9178, "step": 57500 }, { "epoch": 13.44, "learning_rate": 0.0002214211257817929, "loss": 16.9156, "step": 58000 }, { "epoch": 13.55, "learning_rate": 0.00022045787970040922, "loss": 16.9093, "step": 58500 }, { "epoch": 13.67, "learning_rate": 0.00021949270326615704, "loss": 16.9013, "step": 59000 }, { "epoch": 13.78, "learning_rate": 0.00021852752683190483, "loss": 16.8895, "step": 59500 }, { "epoch": 13.9, "learning_rate": 0.00021756235039765268, "loss": 16.8733, "step": 60000 }, { "epoch": 14.01, "learning_rate": 0.0002165971739634005, "loss": 16.881, "step": 60500 }, { "epoch": 14.13, "learning_rate": 0.0002156319975291483, "loss": 16.8109, "step": 61000 }, { "epoch": 14.25, "learning_rate": 0.00021466682109489614, "loss": 16.8187, "step": 61500 }, { "epoch": 14.36, "learning_rate": 0.00021370164466064394, "loss": 16.8078, "step": 62000 }, { "epoch": 14.48, "learning_rate": 0.00021273646822639176, "loss": 16.8041, "step": 62500 }, { "epoch": 14.59, "learning_rate": 0.0002117712917921396, "loss": 16.7933, "step": 63000 }, { "epoch": 14.71, "learning_rate": 0.0002108061153578874, "loss": 16.7964, "step": 63500 }, { "epoch": 14.83, "learning_rate": 0.00020984093892363522, "loss": 16.778, "step": 64000 }, { "epoch": 14.94, "learning_rate": 0.00020887576248938302, "loss": 16.7764, "step": 64500 }, { "epoch": 15.06, "learning_rate": 0.00020791058605513086, "loss": 16.7603, "step": 65000 }, { "epoch": 15.17, "learning_rate": 0.00020694540962087868, "loss": 16.7251, "step": 65500 }, { "epoch": 15.29, "learning_rate": 0.00020598023318662648, "loss": 16.712, "step": 66000 }, { "epoch": 15.4, "learning_rate": 0.00020501505675237433, "loss": 16.7072, "step": 66500 }, { "epoch": 15.52, "learning_rate": 0.00020404988031812215, "loss": 16.7106, "step": 67000 }, { "epoch": 15.64, "learning_rate": 0.00020308470388386994, "loss": 16.7037, "step": 67500 }, { "epoch": 15.75, "learning_rate": 0.0002021195274496178, "loss": 16.6918, "step": 68000 }, { "epoch": 15.87, "learning_rate": 0.00020115435101536558, "loss": 16.6886, "step": 68500 }, { "epoch": 15.98, "learning_rate": 0.0002001891745811134, "loss": 16.6947, "step": 69000 }, { "epoch": 16.1, "learning_rate": 0.00019922399814686125, "loss": 16.6516, "step": 69500 }, { "epoch": 16.21, "learning_rate": 0.00019825882171260905, "loss": 16.6403, "step": 70000 }, { "epoch": 16.33, "learning_rate": 0.00019729364527835687, "loss": 16.6343, "step": 70500 }, { "epoch": 16.45, "learning_rate": 0.00019632846884410466, "loss": 16.6257, "step": 71000 }, { "epoch": 16.56, "learning_rate": 0.00019536522276272103, "loss": 16.6181, "step": 71500 }, { "epoch": 16.68, "learning_rate": 0.00019440004632846882, "loss": 16.6204, "step": 72000 }, { "epoch": 16.79, "learning_rate": 0.00019343486989421664, "loss": 16.6101, "step": 72500 }, { "epoch": 16.91, "learning_rate": 0.00019247162381283295, "loss": 16.6116, "step": 73000 }, { "epoch": 17.03, "learning_rate": 0.0001915083777314493, "loss": 16.6047, "step": 73500 }, { "epoch": 17.14, "learning_rate": 0.0001905432012971971, "loss": 16.5427, "step": 74000 }, { "epoch": 17.26, "learning_rate": 0.00018957802486294493, "loss": 16.5453, "step": 74500 }, { "epoch": 17.37, "learning_rate": 0.00018861284842869276, "loss": 16.5489, "step": 75000 }, { "epoch": 17.49, "learning_rate": 0.00018764960234730907, "loss": 16.5505, "step": 75500 }, { "epoch": 17.6, "learning_rate": 0.0001866844259130569, "loss": 16.5423, "step": 76000 }, { "epoch": 17.72, "learning_rate": 0.0001857192494788047, "loss": 16.543, "step": 76500 }, { "epoch": 17.84, "learning_rate": 0.00018475407304455253, "loss": 16.5402, "step": 77000 }, { "epoch": 17.95, "learning_rate": 0.00018378889661030035, "loss": 16.5373, "step": 77500 }, { "epoch": 18.07, "learning_rate": 0.00018282372017604814, "loss": 16.5045, "step": 78000 }, { "epoch": 18.18, "learning_rate": 0.000181858543741796, "loss": 16.4782, "step": 78500 }, { "epoch": 18.3, "learning_rate": 0.00018089336730754381, "loss": 16.4817, "step": 79000 }, { "epoch": 18.42, "learning_rate": 0.0001799281908732916, "loss": 16.4737, "step": 79500 }, { "epoch": 18.53, "learning_rate": 0.00017896494479190797, "loss": 16.4797, "step": 80000 }, { "epoch": 18.65, "learning_rate": 0.00017799976835765577, "loss": 16.4755, "step": 80500 }, { "epoch": 18.76, "learning_rate": 0.0001770345919234036, "loss": 16.464, "step": 81000 }, { "epoch": 18.88, "learning_rate": 0.00017606941548915138, "loss": 16.4599, "step": 81500 }, { "epoch": 18.99, "learning_rate": 0.00017510423905489923, "loss": 16.4511, "step": 82000 }, { "epoch": 19.11, "learning_rate": 0.00017413906262064705, "loss": 16.42, "step": 82500 }, { "epoch": 19.23, "learning_rate": 0.00017317581653926336, "loss": 16.4039, "step": 83000 }, { "epoch": 19.34, "learning_rate": 0.00017221064010501116, "loss": 16.4083, "step": 83500 }, { "epoch": 19.46, "learning_rate": 0.000171245463670759, "loss": 16.413, "step": 84000 }, { "epoch": 19.57, "learning_rate": 0.00017028028723650683, "loss": 16.4069, "step": 84500 }, { "epoch": 19.69, "learning_rate": 0.00016931511080225462, "loss": 16.3972, "step": 85000 }, { "epoch": 19.81, "learning_rate": 0.00016834993436800247, "loss": 16.4007, "step": 85500 }, { "epoch": 19.92, "learning_rate": 0.0001673847579337503, "loss": 16.3945, "step": 86000 }, { "epoch": 20.04, "learning_rate": 0.00016641958149949808, "loss": 16.3928, "step": 86500 }, { "epoch": 20.15, "learning_rate": 0.00016545440506524593, "loss": 16.3468, "step": 87000 }, { "epoch": 20.27, "learning_rate": 0.00016448922863099373, "loss": 16.3459, "step": 87500 }, { "epoch": 20.38, "learning_rate": 0.00016352598254961006, "loss": 16.3429, "step": 88000 }, { "epoch": 20.5, "learning_rate": 0.00016256080611535786, "loss": 16.3394, "step": 88500 }, { "epoch": 20.62, "learning_rate": 0.0001615956296811057, "loss": 16.3417, "step": 89000 }, { "epoch": 20.73, "learning_rate": 0.0001606304532468535, "loss": 16.3369, "step": 89500 }, { "epoch": 20.85, "learning_rate": 0.00015966527681260132, "loss": 16.3392, "step": 90000 }, { "epoch": 20.96, "learning_rate": 0.00015870203073121763, "loss": 16.3402, "step": 90500 }, { "epoch": 21.08, "learning_rate": 0.00015773685429696548, "loss": 16.3009, "step": 91000 }, { "epoch": 21.2, "learning_rate": 0.00015677167786271327, "loss": 16.2924, "step": 91500 }, { "epoch": 21.31, "learning_rate": 0.0001558065014284611, "loss": 16.2908, "step": 92000 }, { "epoch": 21.43, "learning_rate": 0.00015484132499420894, "loss": 16.289, "step": 92500 }, { "epoch": 21.54, "learning_rate": 0.00015387614855995674, "loss": 16.2874, "step": 93000 }, { "epoch": 21.66, "learning_rate": 0.00015291097212570456, "loss": 16.2869, "step": 93500 }, { "epoch": 21.77, "learning_rate": 0.00015194772604432087, "loss": 16.2867, "step": 94000 }, { "epoch": 21.89, "learning_rate": 0.00015098254961006872, "loss": 16.2779, "step": 94500 }, { "epoch": 22.01, "learning_rate": 0.00015002123388155355, "loss": 16.2885, "step": 95000 }, { "epoch": 22.12, "learning_rate": 0.00014905605744730134, "loss": 16.2251, "step": 95500 }, { "epoch": 22.24, "learning_rate": 0.0001480908810130492, "loss": 16.247, "step": 96000 }, { "epoch": 22.35, "learning_rate": 0.00014712570457879698, "loss": 16.2315, "step": 96500 }, { "epoch": 22.47, "learning_rate": 0.0001461605281445448, "loss": 16.2464, "step": 97000 }, { "epoch": 22.59, "learning_rate": 0.00014519535171029263, "loss": 16.2326, "step": 97500 }, { "epoch": 22.7, "learning_rate": 0.00014423017527604045, "loss": 16.2327, "step": 98000 }, { "epoch": 22.82, "learning_rate": 0.00014326499884178827, "loss": 16.2269, "step": 98500 }, { "epoch": 22.93, "learning_rate": 0.0001422998224075361, "loss": 16.2186, "step": 99000 }, { "epoch": 23.05, "learning_rate": 0.0001413346459732839, "loss": 16.2302, "step": 99500 }, { "epoch": 23.16, "learning_rate": 0.0001403694695390317, "loss": 16.1879, "step": 100000 }, { "epoch": 23.28, "learning_rate": 0.00013940622345764804, "loss": 16.1834, "step": 100500 }, { "epoch": 23.4, "learning_rate": 0.00013844104702339586, "loss": 16.1779, "step": 101000 }, { "epoch": 23.51, "learning_rate": 0.00013747587058914368, "loss": 16.1835, "step": 101500 }, { "epoch": 23.63, "learning_rate": 0.00013651455486062851, "loss": 16.1829, "step": 102000 }, { "epoch": 23.74, "learning_rate": 0.00013554937842637633, "loss": 16.1814, "step": 102500 }, { "epoch": 23.86, "learning_rate": 0.00013458420199212413, "loss": 16.1864, "step": 103000 }, { "epoch": 23.97, "learning_rate": 0.00013361902555787198, "loss": 16.1784, "step": 103500 }, { "epoch": 24.09, "learning_rate": 0.0001326538491236198, "loss": 16.149, "step": 104000 }, { "epoch": 24.21, "learning_rate": 0.0001316886726893676, "loss": 16.1459, "step": 104500 }, { "epoch": 24.32, "learning_rate": 0.0001307234962551154, "loss": 16.1374, "step": 105000 }, { "epoch": 24.44, "learning_rate": 0.00012975831982086326, "loss": 16.143, "step": 105500 }, { "epoch": 24.55, "learning_rate": 0.00012879314338661105, "loss": 16.1267, "step": 106000 }, { "epoch": 24.67, "learning_rate": 0.00012782796695235888, "loss": 16.1363, "step": 106500 }, { "epoch": 24.79, "learning_rate": 0.0001268666512238437, "loss": 16.1328, "step": 107000 }, { "epoch": 24.9, "learning_rate": 0.00012590147478959153, "loss": 16.1314, "step": 107500 }, { "epoch": 25.02, "learning_rate": 0.00012493629835533935, "loss": 16.1427, "step": 108000 }, { "epoch": 25.13, "learning_rate": 0.00012397112192108717, "loss": 16.0975, "step": 108500 }, { "epoch": 25.25, "learning_rate": 0.000123005945486835, "loss": 16.091, "step": 109000 }, { "epoch": 25.36, "learning_rate": 0.00012204076905258281, "loss": 16.0959, "step": 109500 }, { "epoch": 25.48, "learning_rate": 0.00012107559261833062, "loss": 16.0919, "step": 110000 }, { "epoch": 25.6, "learning_rate": 0.00012011041618407844, "loss": 16.077, "step": 110500 }, { "epoch": 25.71, "learning_rate": 0.00011914523974982626, "loss": 16.0942, "step": 111000 }, { "epoch": 25.83, "learning_rate": 0.00011818006331557408, "loss": 16.0786, "step": 111500 }, { "epoch": 25.94, "learning_rate": 0.00011721681723419039, "loss": 16.0852, "step": 112000 }, { "epoch": 26.06, "learning_rate": 0.00011625164079993821, "loss": 16.0732, "step": 112500 }, { "epoch": 26.18, "learning_rate": 0.00011528646436568605, "loss": 16.0445, "step": 113000 }, { "epoch": 26.29, "learning_rate": 0.00011432128793143386, "loss": 16.043, "step": 113500 }, { "epoch": 26.41, "learning_rate": 0.00011335611149718168, "loss": 16.0484, "step": 114000 }, { "epoch": 26.52, "learning_rate": 0.00011239093506292948, "loss": 16.0494, "step": 114500 }, { "epoch": 26.64, "learning_rate": 0.00011142575862867732, "loss": 16.043, "step": 115000 }, { "epoch": 26.75, "learning_rate": 0.00011046058219442514, "loss": 16.0415, "step": 115500 }, { "epoch": 26.87, "learning_rate": 0.00010949540576017295, "loss": 16.032, "step": 116000 }, { "epoch": 26.99, "learning_rate": 0.00010853022932592077, "loss": 16.0346, "step": 116500 }, { "epoch": 27.1, "learning_rate": 0.00010756505289166857, "loss": 16.0145, "step": 117000 }, { "epoch": 27.22, "learning_rate": 0.00010659987645741641, "loss": 16.0032, "step": 117500 }, { "epoch": 27.33, "learning_rate": 0.00010563470002316423, "loss": 15.9992, "step": 118000 }, { "epoch": 27.45, "learning_rate": 0.00010467145394178054, "loss": 16.0033, "step": 118500 }, { "epoch": 27.57, "learning_rate": 0.00010370627750752838, "loss": 15.9964, "step": 119000 }, { "epoch": 27.68, "learning_rate": 0.00010274303142614469, "loss": 15.9994, "step": 119500 }, { "epoch": 27.8, "learning_rate": 0.00010177785499189251, "loss": 16.0, "step": 120000 } ], "max_steps": 172680, "num_train_epochs": 40, "total_flos": 0.0, "trial_name": null, "trial_params": null }