{ "best_metric": 0.5068374276161194, "best_model_checkpoint": "/home/datta0/models/lora_final/Meta-Llama-3-8B_metamath_default/checkpoint-611", "epoch": 0.9898744430943702, "eval_steps": 13, "global_step": 611, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016200891049007696, "grad_norm": 21.83199119567871, "learning_rate": 2.3076923076923076e-05, "loss": 1.2178, "step": 1 }, { "epoch": 0.011340623734305387, "grad_norm": 4.216019153594971, "learning_rate": 0.00016153846153846153, "loss": 0.8666, "step": 7 }, { "epoch": 0.021061158363710003, "eval_loss": 0.7509176135063171, "eval_runtime": 12.1244, "eval_samples_per_second": 41.239, "eval_steps_per_second": 5.196, "step": 13 }, { "epoch": 0.022681247468610773, "grad_norm": 4.796965599060059, "learning_rate": 0.0002999979709808197, "loss": 0.6998, "step": 14 }, { "epoch": 0.03402187120291616, "grad_norm": 5.564920425415039, "learning_rate": 0.0002998701612152596, "loss": 0.6952, "step": 21 }, { "epoch": 0.042122316727420006, "eval_loss": 0.736819326877594, "eval_runtime": 102.3178, "eval_samples_per_second": 4.887, "eval_steps_per_second": 0.616, "step": 26 }, { "epoch": 0.04536249493722155, "grad_norm": 7.243711948394775, "learning_rate": 0.0002995437011859465, "loss": 0.7117, "step": 28 }, { "epoch": 0.056703118671526935, "grad_norm": 5.288980960845947, "learning_rate": 0.00029901902360990936, "loss": 0.7079, "step": 35 }, { "epoch": 0.06318347509113001, "eval_loss": 0.7195977568626404, "eval_runtime": 102.5692, "eval_samples_per_second": 4.875, "eval_steps_per_second": 0.614, "step": 39 }, { "epoch": 0.06804374240583232, "grad_norm": 5.120002746582031, "learning_rate": 0.00029829682393805085, "loss": 0.6827, "step": 42 }, { "epoch": 0.0793843661401377, "grad_norm": 8.668340682983398, "learning_rate": 0.0002973780594333385, "loss": 0.6922, "step": 49 }, { "epoch": 0.08424463345484001, "eval_loss": 0.7065702080726624, "eval_runtime": 12.1388, "eval_samples_per_second": 41.19, "eval_steps_per_second": 5.19, "step": 52 }, { "epoch": 0.0907249898744431, "grad_norm": 4.637055397033691, "learning_rate": 0.00029626394790197025, "loss": 0.6824, "step": 56 }, { "epoch": 0.10206561360874848, "grad_norm": 5.630373477935791, "learning_rate": 0.00029495596607919305, "loss": 0.6565, "step": 63 }, { "epoch": 0.10530579181855002, "eval_loss": 0.7073802351951599, "eval_runtime": 103.2813, "eval_samples_per_second": 4.841, "eval_steps_per_second": 0.61, "step": 65 }, { "epoch": 0.11340623734305387, "grad_norm": 6.744913578033447, "learning_rate": 0.00029345584767191685, "loss": 0.6785, "step": 70 }, { "epoch": 0.12474686107735926, "grad_norm": 5.90676736831665, "learning_rate": 0.0002917655810607161, "loss": 0.6791, "step": 77 }, { "epoch": 0.12636695018226002, "eval_loss": 0.7263020873069763, "eval_runtime": 93.1665, "eval_samples_per_second": 5.367, "eval_steps_per_second": 0.676, "step": 78 }, { "epoch": 0.13608748481166463, "grad_norm": 7.519783020019531, "learning_rate": 0.0002898874066642667, "loss": 0.6764, "step": 84 }, { "epoch": 0.14742810854597002, "grad_norm": 7.202000617980957, "learning_rate": 0.00028782381396971003, "loss": 0.6858, "step": 91 }, { "epoch": 0.14742810854597002, "eval_loss": 0.7018572092056274, "eval_runtime": 12.1782, "eval_samples_per_second": 41.057, "eval_steps_per_second": 5.173, "step": 91 }, { "epoch": 0.1587687322802754, "grad_norm": 5.747766971588135, "learning_rate": 0.00028557753823288173, "loss": 0.6693, "step": 98 }, { "epoch": 0.16848926690968002, "eval_loss": 0.6925504207611084, "eval_runtime": 108.0515, "eval_samples_per_second": 4.627, "eval_steps_per_second": 0.583, "step": 104 }, { "epoch": 0.1701093560145808, "grad_norm": 5.340470790863037, "learning_rate": 0.0002831515568527781, "loss": 0.6859, "step": 105 }, { "epoch": 0.1814499797488862, "grad_norm": 6.736198902130127, "learning_rate": 0.00028054908542506627, "loss": 0.6503, "step": 112 }, { "epoch": 0.18955042527339003, "eval_loss": 0.6922364234924316, "eval_runtime": 96.4494, "eval_samples_per_second": 5.184, "eval_steps_per_second": 0.653, "step": 117 }, { "epoch": 0.19279060348319157, "grad_norm": 5.258207321166992, "learning_rate": 0.00027777357347986823, "loss": 0.6626, "step": 119 }, { "epoch": 0.20413122721749696, "grad_norm": 7.422248840332031, "learning_rate": 0.00027482869990946986, "loss": 0.6488, "step": 126 }, { "epoch": 0.21061158363710003, "eval_loss": 0.692547619342804, "eval_runtime": 12.1559, "eval_samples_per_second": 41.132, "eval_steps_per_second": 5.183, "step": 130 }, { "epoch": 0.21547185095180235, "grad_norm": 4.835718631744385, "learning_rate": 0.0002717183680920135, "loss": 0.641, "step": 133 }, { "epoch": 0.22681247468610774, "grad_norm": 6.634873390197754, "learning_rate": 0.00026844670071763906, "loss": 0.6505, "step": 140 }, { "epoch": 0.23167274200081003, "eval_loss": 0.6843672394752502, "eval_runtime": 110.1028, "eval_samples_per_second": 4.541, "eval_steps_per_second": 0.572, "step": 143 }, { "epoch": 0.23815309842041313, "grad_norm": 5.616636276245117, "learning_rate": 0.00026501803432393037, "loss": 0.6686, "step": 147 }, { "epoch": 0.24949372215471852, "grad_norm": 4.444299221038818, "learning_rate": 0.00026143691354791145, "loss": 0.6533, "step": 154 }, { "epoch": 0.25273390036452004, "eval_loss": 0.6842171549797058, "eval_runtime": 104.1575, "eval_samples_per_second": 4.8, "eval_steps_per_second": 0.605, "step": 156 }, { "epoch": 0.2608343458890239, "grad_norm": 4.952290058135986, "learning_rate": 0.00025770808510220956, "loss": 0.6518, "step": 161 }, { "epoch": 0.27217496962332927, "grad_norm": 5.576895236968994, "learning_rate": 0.00025383649148337105, "loss": 0.6505, "step": 168 }, { "epoch": 0.27379505872823007, "eval_loss": 0.6708831787109375, "eval_runtime": 12.1814, "eval_samples_per_second": 41.046, "eval_steps_per_second": 5.172, "step": 169 }, { "epoch": 0.28351559335763465, "grad_norm": 4.597885608673096, "learning_rate": 0.0002498272644206695, "loss": 0.6479, "step": 175 }, { "epoch": 0.29485621709194004, "grad_norm": 4.833500862121582, "learning_rate": 0.0002456857180740884, "loss": 0.6456, "step": 182 }, { "epoch": 0.29485621709194004, "eval_loss": 0.6661036014556885, "eval_runtime": 108.3108, "eval_samples_per_second": 4.616, "eval_steps_per_second": 0.582, "step": 182 }, { "epoch": 0.30619684082624543, "grad_norm": 5.517384052276611, "learning_rate": 0.0002414173419904956, "loss": 0.6307, "step": 189 }, { "epoch": 0.3159173754556501, "eval_loss": 0.6699193120002747, "eval_runtime": 105.864, "eval_samples_per_second": 4.723, "eval_steps_per_second": 0.595, "step": 195 }, { "epoch": 0.3175374645605508, "grad_norm": 5.475388526916504, "learning_rate": 0.00023702779382734566, "loss": 0.6403, "step": 196 }, { "epoch": 0.3288780882948562, "grad_norm": 6.005538463592529, "learning_rate": 0.0002325228918535541, "loss": 0.6144, "step": 203 }, { "epoch": 0.33697853381936005, "eval_loss": 0.6628842949867249, "eval_runtime": 12.1654, "eval_samples_per_second": 41.1, "eval_steps_per_second": 5.179, "step": 208 }, { "epoch": 0.3402187120291616, "grad_norm": 5.392065048217773, "learning_rate": 0.00022790860723748442, "loss": 0.6437, "step": 210 }, { "epoch": 0.351559335763467, "grad_norm": 4.369307041168213, "learning_rate": 0.00022319105613226921, "loss": 0.6286, "step": 217 }, { "epoch": 0.3580396921830701, "eval_loss": 0.6546506285667419, "eval_runtime": 106.5209, "eval_samples_per_second": 4.694, "eval_steps_per_second": 0.591, "step": 221 }, { "epoch": 0.3628999594977724, "grad_norm": 5.212913990020752, "learning_rate": 0.00021837649156895706, "loss": 0.6325, "step": 224 }, { "epoch": 0.37424058323207776, "grad_norm": 4.210490703582764, "learning_rate": 0.00021347129516822945, "loss": 0.6261, "step": 231 }, { "epoch": 0.37910085054678005, "eval_loss": 0.6468992829322815, "eval_runtime": 108.4304, "eval_samples_per_second": 4.611, "eval_steps_per_second": 0.581, "step": 234 }, { "epoch": 0.38558120696638315, "grad_norm": 4.121553421020508, "learning_rate": 0.00020848196868167505, "loss": 0.6326, "step": 238 }, { "epoch": 0.39692183070068854, "grad_norm": 4.0263142585754395, "learning_rate": 0.000203415125373832, "loss": 0.6365, "step": 245 }, { "epoch": 0.4001620089104901, "eval_loss": 0.6482229232788086, "eval_runtime": 12.1869, "eval_samples_per_second": 41.028, "eval_steps_per_second": 5.169, "step": 247 }, { "epoch": 0.4082624544349939, "grad_norm": 4.285489559173584, "learning_rate": 0.00019827748125642242, "loss": 0.6276, "step": 252 }, { "epoch": 0.4196030781692993, "grad_norm": 4.923107147216797, "learning_rate": 0.0001930758461863965, "loss": 0.6108, "step": 259 }, { "epoch": 0.42122316727420006, "eval_loss": 0.6427775621414185, "eval_runtime": 102.1449, "eval_samples_per_second": 4.895, "eval_steps_per_second": 0.617, "step": 260 }, { "epoch": 0.4309437019036047, "grad_norm": 5.982186317443848, "learning_rate": 0.0001878171148395872, "loss": 0.6149, "step": 266 }, { "epoch": 0.4422843256379101, "grad_norm": 3.5820586681365967, "learning_rate": 0.00018250825757193848, "loss": 0.6207, "step": 273 }, { "epoch": 0.4422843256379101, "eval_loss": 0.6321972608566284, "eval_runtime": 102.3465, "eval_samples_per_second": 4.885, "eval_steps_per_second": 0.616, "step": 273 }, { "epoch": 0.4536249493722155, "grad_norm": 4.147037029266357, "learning_rate": 0.0001771563111804211, "loss": 0.6219, "step": 280 }, { "epoch": 0.46334548400162007, "eval_loss": 0.6264978051185608, "eval_runtime": 12.203, "eval_samples_per_second": 40.974, "eval_steps_per_second": 5.163, "step": 286 }, { "epoch": 0.46496557310652087, "grad_norm": 3.318942070007324, "learning_rate": 0.0001717683695758819, "loss": 0.602, "step": 287 }, { "epoch": 0.47630619684082626, "grad_norm": 3.9704108238220215, "learning_rate": 0.00016635157438018983, "loss": 0.6133, "step": 294 }, { "epoch": 0.4844066423653301, "eval_loss": 0.6212669610977173, "eval_runtime": 96.1686, "eval_samples_per_second": 5.199, "eval_steps_per_second": 0.655, "step": 299 }, { "epoch": 0.48764682057513165, "grad_norm": 4.209397792816162, "learning_rate": 0.0001609131054601416, "loss": 0.6142, "step": 301 }, { "epoch": 0.49898744430943703, "grad_norm": 3.506843328475952, "learning_rate": 0.00015546017141067432, "loss": 0.5944, "step": 308 }, { "epoch": 0.5054678007290401, "eval_loss": 0.6138330101966858, "eval_runtime": 96.8265, "eval_samples_per_second": 5.164, "eval_steps_per_second": 0.651, "step": 312 }, { "epoch": 0.5103280680437424, "grad_norm": 3.397951364517212, "learning_rate": 0.00015, "loss": 0.6082, "step": 315 }, { "epoch": 0.5216686917780478, "grad_norm": 3.766294002532959, "learning_rate": 0.0001445398285893257, "loss": 0.5871, "step": 322 }, { "epoch": 0.5265289590927501, "eval_loss": 0.6034436225891113, "eval_runtime": 12.2193, "eval_samples_per_second": 40.919, "eval_steps_per_second": 5.156, "step": 325 }, { "epoch": 0.5330093155123532, "grad_norm": 3.9341065883636475, "learning_rate": 0.0001390868945398584, "loss": 0.588, "step": 329 }, { "epoch": 0.5443499392466585, "grad_norm": 3.7946388721466064, "learning_rate": 0.00013364842561981014, "loss": 0.5827, "step": 336 }, { "epoch": 0.5475901174564601, "eval_loss": 0.6012772917747498, "eval_runtime": 100.8024, "eval_samples_per_second": 4.96, "eval_steps_per_second": 0.625, "step": 338 }, { "epoch": 0.555690562980964, "grad_norm": 2.895064353942871, "learning_rate": 0.00012823163042411807, "loss": 0.5728, "step": 343 }, { "epoch": 0.5670311867152693, "grad_norm": 3.9632740020751953, "learning_rate": 0.0001228436888195789, "loss": 0.5714, "step": 350 }, { "epoch": 0.56865127582017, "eval_loss": 0.5922607183456421, "eval_runtime": 95.7391, "eval_samples_per_second": 5.223, "eval_steps_per_second": 0.658, "step": 351 }, { "epoch": 0.5783718104495748, "grad_norm": 3.257824420928955, "learning_rate": 0.00011749174242806152, "loss": 0.5824, "step": 357 }, { "epoch": 0.5897124341838801, "grad_norm": 4.506229877471924, "learning_rate": 0.00011218288516041279, "loss": 0.5512, "step": 364 }, { "epoch": 0.5897124341838801, "eval_loss": 0.5848901867866516, "eval_runtime": 12.2285, "eval_samples_per_second": 40.888, "eval_steps_per_second": 5.152, "step": 364 }, { "epoch": 0.6010530579181855, "grad_norm": 3.2441561222076416, "learning_rate": 0.00010692415381360349, "loss": 0.5636, "step": 371 }, { "epoch": 0.6107735925475901, "eval_loss": 0.5755201578140259, "eval_runtime": 106.0764, "eval_samples_per_second": 4.714, "eval_steps_per_second": 0.594, "step": 377 }, { "epoch": 0.6123936816524909, "grad_norm": 3.45365834236145, "learning_rate": 0.00010172251874357757, "loss": 0.5535, "step": 378 }, { "epoch": 0.6237343053867963, "grad_norm": 3.905564785003662, "learning_rate": 9.658487462616794e-05, "loss": 0.5564, "step": 385 }, { "epoch": 0.6318347509113001, "eval_loss": 0.5684311389923096, "eval_runtime": 96.7227, "eval_samples_per_second": 5.169, "eval_steps_per_second": 0.651, "step": 390 }, { "epoch": 0.6350749291211016, "grad_norm": 2.8492631912231445, "learning_rate": 9.151803131832493e-05, "loss": 0.5624, "step": 392 }, { "epoch": 0.6464155528554071, "grad_norm": 3.4206936359405518, "learning_rate": 8.652870483177049e-05, "loss": 0.5444, "step": 399 }, { "epoch": 0.6528959092750102, "eval_loss": 0.5647120475769043, "eval_runtime": 12.2178, "eval_samples_per_second": 40.924, "eval_steps_per_second": 5.156, "step": 403 }, { "epoch": 0.6577561765897124, "grad_norm": 3.9881739616394043, "learning_rate": 8.162350843104291e-05, "loss": 0.5499, "step": 406 }, { "epoch": 0.6690968003240179, "grad_norm": 3.396562337875366, "learning_rate": 7.680894386773072e-05, "loss": 0.5431, "step": 413 }, { "epoch": 0.6739570676387201, "eval_loss": 0.5582295060157776, "eval_runtime": 108.0987, "eval_samples_per_second": 4.625, "eval_steps_per_second": 0.583, "step": 416 }, { "epoch": 0.6804374240583232, "grad_norm": 2.720857858657837, "learning_rate": 7.209139276251558e-05, "loss": 0.5387, "step": 420 }, { "epoch": 0.6917780477926286, "grad_norm": 2.7699272632598877, "learning_rate": 6.747710814644589e-05, "loss": 0.5311, "step": 427 }, { "epoch": 0.6950182260024301, "eval_loss": 0.5533111691474915, "eval_runtime": 97.6016, "eval_samples_per_second": 5.123, "eval_steps_per_second": 0.645, "step": 429 }, { "epoch": 0.703118671526934, "grad_norm": 3.2543938159942627, "learning_rate": 6.297220617265435e-05, "loss": 0.5219, "step": 434 }, { "epoch": 0.7144592952612394, "grad_norm": 3.016479015350342, "learning_rate": 5.858265800950438e-05, "loss": 0.5323, "step": 441 }, { "epoch": 0.7160793843661402, "eval_loss": 0.545806884765625, "eval_runtime": 12.2002, "eval_samples_per_second": 40.983, "eval_steps_per_second": 5.164, "step": 442 }, { "epoch": 0.7257999189955447, "grad_norm": 3.142430067062378, "learning_rate": 5.4314281925911634e-05, "loss": 0.5278, "step": 448 }, { "epoch": 0.7371405427298502, "grad_norm": 2.5312345027923584, "learning_rate": 5.0172735579330526e-05, "loss": 0.5172, "step": 455 }, { "epoch": 0.7371405427298502, "eval_loss": 0.53862464427948, "eval_runtime": 109.318, "eval_samples_per_second": 4.574, "eval_steps_per_second": 0.576, "step": 455 }, { "epoch": 0.7484811664641555, "grad_norm": 3.1494243144989014, "learning_rate": 4.616350851662895e-05, "loss": 0.5113, "step": 462 }, { "epoch": 0.7582017010935601, "eval_loss": 0.5340880751609802, "eval_runtime": 98.7219, "eval_samples_per_second": 5.065, "eval_steps_per_second": 0.638, "step": 468 }, { "epoch": 0.759821790198461, "grad_norm": 3.0264575481414795, "learning_rate": 4.229191489779047e-05, "loss": 0.535, "step": 469 }, { "epoch": 0.7711624139327663, "grad_norm": 3.485691547393799, "learning_rate": 3.8563086452088506e-05, "loss": 0.4989, "step": 476 }, { "epoch": 0.7792628594572701, "eval_loss": 0.5295507907867432, "eval_runtime": 12.2147, "eval_samples_per_second": 40.934, "eval_steps_per_second": 5.158, "step": 481 }, { "epoch": 0.7825030376670717, "grad_norm": 2.9964497089385986, "learning_rate": 3.498196567606959e-05, "loss": 0.5217, "step": 483 }, { "epoch": 0.7938436614013771, "grad_norm": 2.854008913040161, "learning_rate": 3.1553299282360966e-05, "loss": 0.4929, "step": 490 }, { "epoch": 0.8003240178209802, "eval_loss": 0.5264267325401306, "eval_runtime": 107.6368, "eval_samples_per_second": 4.645, "eval_steps_per_second": 0.585, "step": 494 }, { "epoch": 0.8051842851356824, "grad_norm": 2.8643412590026855, "learning_rate": 2.828163190798644e-05, "loss": 0.508, "step": 497 }, { "epoch": 0.8165249088699879, "grad_norm": 2.737133502960205, "learning_rate": 2.5171300090530106e-05, "loss": 0.5266, "step": 504 }, { "epoch": 0.8213851761846902, "eval_loss": 0.5213577747344971, "eval_runtime": 107.3148, "eval_samples_per_second": 4.659, "eval_steps_per_second": 0.587, "step": 507 }, { "epoch": 0.8278655326042932, "grad_norm": 2.559361457824707, "learning_rate": 2.2226426520131734e-05, "loss": 0.5014, "step": 511 }, { "epoch": 0.8392061563385986, "grad_norm": 3.189368486404419, "learning_rate": 1.9450914574933725e-05, "loss": 0.5075, "step": 518 }, { "epoch": 0.8424463345484001, "eval_loss": 0.518385112285614, "eval_runtime": 12.2167, "eval_samples_per_second": 40.928, "eval_steps_per_second": 5.157, "step": 520 }, { "epoch": 0.850546780072904, "grad_norm": 2.4905502796173096, "learning_rate": 1.6848443147221828e-05, "loss": 0.4834, "step": 525 }, { "epoch": 0.8618874038072094, "grad_norm": 2.5532329082489014, "learning_rate": 1.4422461767118233e-05, "loss": 0.4917, "step": 532 }, { "epoch": 0.8635074929121102, "eval_loss": 0.515020489692688, "eval_runtime": 102.4557, "eval_samples_per_second": 4.88, "eval_steps_per_second": 0.615, "step": 533 }, { "epoch": 0.8732280275415147, "grad_norm": 2.1145200729370117, "learning_rate": 1.2176186030289936e-05, "loss": 0.4893, "step": 539 }, { "epoch": 0.8845686512758202, "grad_norm": 3.142739772796631, "learning_rate": 1.011259333573326e-05, "loss": 0.5078, "step": 546 }, { "epoch": 0.8845686512758202, "eval_loss": 0.5123878121376038, "eval_runtime": 107.0251, "eval_samples_per_second": 4.672, "eval_steps_per_second": 0.589, "step": 546 }, { "epoch": 0.8959092750101255, "grad_norm": 2.60658860206604, "learning_rate": 8.234418939283866e-06, "loss": 0.4897, "step": 553 }, { "epoch": 0.9056298096395302, "eval_loss": 0.509852945804596, "eval_runtime": 12.2385, "eval_samples_per_second": 40.855, "eval_steps_per_second": 5.148, "step": 559 }, { "epoch": 0.907249898744431, "grad_norm": 3.024414300918579, "learning_rate": 6.544152328083152e-06, "loss": 0.5, "step": 560 }, { "epoch": 0.9185905224787363, "grad_norm": 3.256274700164795, "learning_rate": 5.044033920806933e-06, "loss": 0.4879, "step": 567 }, { "epoch": 0.9266909680032401, "eval_loss": 0.5081230998039246, "eval_runtime": 101.7485, "eval_samples_per_second": 4.914, "eval_steps_per_second": 0.619, "step": 572 }, { "epoch": 0.9299311462130417, "grad_norm": 3.299356698989868, "learning_rate": 3.7360520980297514e-06, "loss": 0.4923, "step": 574 }, { "epoch": 0.9412717699473471, "grad_norm": 2.953251361846924, "learning_rate": 2.6219405666614402e-06, "loss": 0.5007, "step": 581 }, { "epoch": 0.9477521263669502, "eval_loss": 0.5073325634002686, "eval_runtime": 101.2425, "eval_samples_per_second": 4.939, "eval_steps_per_second": 0.622, "step": 585 }, { "epoch": 0.9526123936816525, "grad_norm": 2.408773183822632, "learning_rate": 1.7031760619491353e-06, "loss": 0.4757, "step": 588 }, { "epoch": 0.9639530174159578, "grad_norm": 2.3097264766693115, "learning_rate": 9.809763900905875e-07, "loss": 0.4979, "step": 595 }, { "epoch": 0.9688132847306602, "eval_loss": 0.5070714950561523, "eval_runtime": 12.2216, "eval_samples_per_second": 40.911, "eval_steps_per_second": 5.155, "step": 598 }, { "epoch": 0.9752936411502633, "grad_norm": 2.2127528190612793, "learning_rate": 4.562988140535073e-07, "loss": 0.4892, "step": 602 }, { "epoch": 0.9866342648845686, "grad_norm": 2.4536213874816895, "learning_rate": 1.298387847403437e-07, "loss": 0.4991, "step": 609 }, { "epoch": 0.9898744430943702, "eval_loss": 0.5068374276161194, "eval_runtime": 97.1562, "eval_samples_per_second": 5.146, "eval_steps_per_second": 0.648, "step": 611 } ], "logging_steps": 7, "max_steps": 617, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.394413366880502e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }